arch-arm: Add support for SVE load/store structures

Change-Id: I4d9cde18dfc3d478eacc156de6a4a9721eb9e2ff
Signed-off-by: Giacomo Gabrielli <giacomo.gabrielli@arm.com>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/13524
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Andreas Sandberg <andreas.sandberg@arm.com>
This commit is contained in:
Javier Setoain
2018-04-04 16:53:17 +01:00
committed by Giacomo Gabrielli
parent 46da8fb805
commit 2e47c6c5ed
6 changed files with 1304 additions and 1 deletions

View File

@@ -45,6 +45,295 @@
namespace ArmISA {
template <typename Element,
template <typename> class MicroopLdMemType,
template <typename> class MicroopDeIntrlvType>
class SveLdStructSS : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
IntRegIndex offset;
uint8_t numregs;
public:
SveLdStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
IntRegIndex _offset, uint8_t _numregs)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
{
numMicroops = numregs * 2;
microOps = new StaticInstPtr[numMicroops];
for (int i = 0; i < numregs; ++i) {
microOps[i] = new MicroopLdMemType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_gp, _base, _offset, _numregs, i);
}
for (int i = 0; i < numregs; ++i) {
microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
_numregs, i, this);
}
microOps[0]->setFirstMicroop();
microOps[numMicroops - 1]->setLastMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
for (int i = 0; i < numregs; ++i) {
printVecReg(ss, (dest + i) % 32, true);
if (i < numregs - 1)
ccprintf(ss, ", ");
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, "/z, [");
printIntReg(ss, base);
ccprintf(ss, ", ");
printIntReg(ss, offset);
ccprintf(ss, "]");
return ss.str();
}
};
template <typename Element,
template <typename> class MicroopStMemType,
template <typename> class MicroopIntrlvType>
class SveStStructSS : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
IntRegIndex offset;
uint8_t numregs;
public:
SveStStructSS(const char* mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
IntRegIndex _offset, uint8_t _numregs)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), offset(_offset), numregs(_numregs)
{
numMicroops = numregs * 2;
microOps = new StaticInstPtr[numMicroops];
for (int i = 0; i < numregs; ++i) {
microOps[i] = new MicroopIntrlvType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_dest, _numregs, i, this);
}
for (int i = 0; i < numregs; ++i) {
microOps[i + numregs] = new MicroopStMemType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_gp, _base, _offset, _numregs, i);
}
microOps[0]->setFirstMicroop();
microOps[numMicroops - 1]->setLastMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
for (int i = 0; i < numregs; ++i) {
printVecReg(ss, (dest + i) % 32, true);
if (i < numregs - 1)
ccprintf(ss, ", ");
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, ", [");
printIntReg(ss, base);
ccprintf(ss, ", ");
printIntReg(ss, offset);
ccprintf(ss, "]");
return ss.str();
}
};
template <typename Element,
template <typename> class MicroopLdMemType,
template <typename> class MicroopDeIntrlvType>
class SveLdStructSI : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
int64_t imm;
uint8_t numregs;
public:
SveLdStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
int64_t _imm, uint8_t _numregs)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
{
numMicroops = numregs * 2;
microOps = new StaticInstPtr[numMicroops];
for (int i = 0; i < numregs; ++i) {
microOps[i] = new MicroopLdMemType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_gp, _base, _imm, _numregs, i);
}
for (int i = 0; i < numregs; ++i) {
microOps[i + numregs] = new MicroopDeIntrlvType<Element>(
mnem, machInst, static_cast<IntRegIndex>((_dest + i) % 32),
_numregs, i, this);
}
microOps[0]->setFirstMicroop();
microOps[numMicroops - 1]->setLastMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
for (int i = 0; i < numregs; ++i) {
printVecReg(ss, (dest + i) % 32, true);
if (i < numregs - 1)
ccprintf(ss, ", ");
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, "/z, [");
printIntReg(ss, base);
if (imm != 0) {
ccprintf(ss, ", #%d, MUL VL", imm);
}
ccprintf(ss, "]");
return ss.str();
}
};
template <typename Element,
template <typename> class MicroopStMemType,
template <typename> class MicroopIntrlvType>
class SveStStructSI : public PredMacroOp
{
protected:
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
int64_t imm;
uint8_t numregs;
public:
SveStStructSI(const char* mnem, ExtMachInst machInst, OpClass __opClass,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
int64_t _imm, uint8_t _numregs)
: PredMacroOp(mnem, machInst, __opClass),
dest(_dest), gp(_gp), base(_base), imm(_imm), numregs(_numregs)
{
numMicroops = numregs * 2;
microOps = new StaticInstPtr[numMicroops];
for (int i = 0; i < numregs; ++i) {
microOps[i] = new MicroopIntrlvType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_dest, _numregs, i, this);
}
for (int i = 0; i < numregs; ++i) {
microOps[i + numregs] = new MicroopStMemType<Element>(
mnem, machInst, static_cast<IntRegIndex>(INTRLVREG0 + i),
_gp, _base, _imm, _numregs, i);
}
microOps[0]->setFirstMicroop();
microOps[numMicroops - 1]->setLastMicroop();
for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
(*uop)->setDelayedCommit();
}
}
Fault
execute(ExecContext *, Trace::InstRecord *) const
{
panic("Execute method called when it shouldn't!");
return NoFault;
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
for (int i = 0; i < numregs; ++i) {
printVecReg(ss, (dest + i) % 32, true);
if (i < numregs - 1)
ccprintf(ss, ", ");
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
ccprintf(ss, ", [");
printIntReg(ss, base);
if (imm != 0) {
ccprintf(ss, ", #%d, MUL VL", imm);
}
ccprintf(ss, "]");
return ss.str();
}
};
template <typename RegElemType, typename MemElemType,
template <typename, typename> class MicroopType,
template <typename> class FirstFaultWritebackMicroopType>

View File

@@ -3123,6 +3123,18 @@ namespace Aarch64
StaticInstPtr
decodeSveLoadStructsSS(ExtMachInst machInst)
{
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
uint8_t num = bits(machInst, 22, 21);
if (rm != 0x1f && num != 0) {
num++;
return decodeSveStructLoadSSInsts(msz, machInst,
zt, pg, rn, rm, num);
}
return new Unknown64(machInst);
} // decodeSveLoadStructsSS
@@ -3135,6 +3147,19 @@ namespace Aarch64
StaticInstPtr
decodeSveLoadStructsSI(ExtMachInst machInst)
{
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
int64_t imm = sext<4>(bits(machInst, 19, 16));
IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
uint8_t num = bits(machInst, 22, 21);
if (num != 0) {
num++;
imm *= num;
return decodeSveStructLoadSIInsts(msz, machInst,
zt, pg, rn, imm, num);
}
return new Unknown64(machInst);
} // decodeSveLoadStructsSI
@@ -3331,12 +3356,37 @@ namespace Aarch64
StaticInstPtr
decodeSveStoreStructsSS(ExtMachInst machInst)
{
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
IntRegIndex rm = (IntRegIndex) (uint8_t) bits(machInst, 20, 16);
IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
uint8_t num = bits(machInst, 22, 21);
if (rm != 0x1f && num != 0) {
num++;
return decodeSveStructStoreSSInsts(msz, machInst,
zt, pg, rn, rm, num);
}
return new Unknown64(machInst);
} // decodeSveStoreStructsSS
StaticInstPtr
decodeSveStoreStructsSI(ExtMachInst machInst)
{
IntRegIndex zt = (IntRegIndex) (uint8_t) bits(machInst, 4, 0);
IntRegIndex rn = (IntRegIndex) (uint8_t) bits(machInst, 9, 5);
int64_t imm = sext<4>(bits(machInst, 19, 16));
IntRegIndex pg = (IntRegIndex) (uint8_t) bits(machInst, 12, 10);
uint8_t msz = bits(machInst, 24, 23);
uint8_t num = bits(machInst, 22, 21);
if (num != 0) {
num++;
imm *= num;
return decodeSveStructStoreSIInsts(msz, machInst,
zt, pg, rn, imm, num);
}
return new Unknown64(machInst);
} // decodeSveStoreStructsSI

View File

@@ -204,6 +204,238 @@ output header {{
output decoder {{
template <class etype>
StaticInstPtr
decodeSveStructLoadSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
int64_t imm, int numregs)
{
static const char* nm[5][4] = {
{ nullptr, nullptr, nullptr, nullptr},
{ nullptr, nullptr, nullptr, nullptr},
{ "ld2b", "ld2h", "ld2w", "ld2d" },
{ "ld3b", "ld3h", "ld3w", "ld3d" },
{ "ld4b", "ld4h", "ld4w", "ld4d" } };
switch (numregs) {
case 2:
return new SveLdStructSI<etype,
SveLoadRegImmMicroop,
SveDeIntrlv2Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, imm, numregs);
case 3:
return new SveLdStructSI<etype,
SveLoadRegImmMicroop,
SveDeIntrlv3Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, imm, numregs);
case 4:
return new SveLdStructSI<etype,
SveLoadRegImmMicroop,
SveDeIntrlv4Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, imm, numregs);
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveStructLoadSIInsts(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
int64_t imm, int numregs)
{
switch (esize) {
case 0:
return decodeSveStructLoadSIInstsByNReg<uint8_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 1:
return decodeSveStructLoadSIInstsByNReg<uint16_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 2:
return decodeSveStructLoadSIInstsByNReg<uint32_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 3:
return decodeSveStructLoadSIInstsByNReg<uint64_t>(esize,
machInst, zt, pg, xn, imm, numregs);
}
return new Unknown64(machInst);
}
template <class etype>
StaticInstPtr
decodeSveStructStoreSIInstsByNReg(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
int64_t imm, int numregs)
{
static const char* nm[5][4] = {
{ nullptr, nullptr, nullptr, nullptr},
{ nullptr, nullptr, nullptr, nullptr},
{ "st2b", "st2h", "st2w", "st2d" },
{ "st3b", "st3h", "st3w", "st3d" },
{ "st4b", "st4h", "st4w", "st4d" } };
switch (numregs) {
case 2:
return new SveStStructSI<etype,
SveStoreRegImmMicroop,
SveIntrlv2Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, imm, numregs);
case 3:
return new SveStStructSI<etype,
SveStoreRegImmMicroop,
SveIntrlv3Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, imm, numregs);
case 4:
return new SveStStructSI<etype,
SveStoreRegImmMicroop,
SveIntrlv4Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, imm, numregs);
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveStructStoreSIInsts(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
int64_t imm, int numregs)
{
switch (esize) {
case 0:
return decodeSveStructStoreSIInstsByNReg<uint8_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 1:
return decodeSveStructStoreSIInstsByNReg<uint16_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 2:
return decodeSveStructStoreSIInstsByNReg<uint32_t>(esize,
machInst, zt, pg, xn, imm, numregs);
case 3:
return decodeSveStructStoreSIInstsByNReg<uint64_t>(esize,
machInst, zt, pg, xn, imm, numregs);
}
return new Unknown64(machInst);
}
template <class etype>
StaticInstPtr
decodeSveStructLoadSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
IntRegIndex xm, int numregs)
{
static const char* nm[5][4] = {
{ nullptr, nullptr, nullptr, nullptr},
{ nullptr, nullptr, nullptr, nullptr},
{ "ld2b", "ld2h", "ld2w", "ld2d" },
{ "ld3b", "ld3h", "ld3w", "ld3d" },
{ "ld4b", "ld4h", "ld4w", "ld4d" } };
switch (numregs) {
case 2:
return new SveLdStructSS<etype,
SveLoadRegRegMicroop,
SveDeIntrlv2Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, xm, numregs);
case 3:
return new SveLdStructSS<etype,
SveLoadRegRegMicroop,
SveDeIntrlv3Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, xm, numregs);
case 4:
return new SveLdStructSS<etype,
SveLoadRegRegMicroop,
SveDeIntrlv4Microop>(
nm[numregs][esize], machInst, MemReadOp,
zt, pg, xn, xm, numregs);
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveStructLoadSSInsts(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
IntRegIndex xm, int numregs)
{
switch (esize) {
case 0:
return decodeSveStructLoadSSInstsByNReg<uint8_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 1:
return decodeSveStructLoadSSInstsByNReg<uint16_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 2:
return decodeSveStructLoadSSInstsByNReg<uint32_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 3:
return decodeSveStructLoadSSInstsByNReg<uint64_t>(esize,
machInst, zt, pg, xn, xm, numregs);
}
return new Unknown64(machInst);
}
template <class etype>
StaticInstPtr
decodeSveStructStoreSSInstsByNReg(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
IntRegIndex xm, int numregs)
{
static const char* nm[5][4] = {
{ nullptr, nullptr, nullptr, nullptr},
{ nullptr, nullptr, nullptr, nullptr},
{ "st2b", "st2h", "st2w", "st2d" },
{ "st3b", "st3h", "st3w", "st3d" },
{ "st4b", "st4h", "st4w", "st4d" } };
switch (numregs) {
case 2:
return new SveStStructSS<etype,
SveStoreRegRegMicroop,
SveIntrlv2Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, xm, numregs);
case 3:
return new SveStStructSS<etype,
SveStoreRegRegMicroop,
SveIntrlv3Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, xm, numregs);
case 4:
return new SveStStructSS<etype,
SveStoreRegRegMicroop,
SveIntrlv4Microop>(
nm[numregs][esize], machInst, MemWriteOp,
zt, pg, xn, xm, numregs);
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveStructStoreSSInsts(uint8_t esize, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex xn,
IntRegIndex xm, int numregs)
{
switch (esize) {
case 0:
return decodeSveStructStoreSSInstsByNReg<uint8_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 1:
return decodeSveStructStoreSSInstsByNReg<uint16_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 2:
return decodeSveStructStoreSSInstsByNReg<uint32_t>(esize,
machInst, zt, pg, xn, xm, numregs);
case 3:
return decodeSveStructStoreSSInstsByNReg<uint64_t>(esize,
machInst, zt, pg, xn, xm, numregs);
}
return new Unknown64(machInst);
}
StaticInstPtr
decodeSveGatherLoadVIInsts(uint8_t dtype, ExtMachInst machInst,
IntRegIndex zt, IntRegIndex pg, IntRegIndex zn,
@@ -1018,6 +1250,231 @@ let {{
header_output += SveGatherLoadCpySrcVecMicroopDeclare.subst(iop)
exec_output += SveGatherLoadCpySrcVecMicroopExecute.subst(iop)
def emitSveInterleaveMicroop():
global header_output, exec_output, decoders
code2 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = regIndex * eCount + i;
unsigned int srcIdx = absIdx / numRegs;
unsigned int srcVec = absIdx % numRegs;
if (srcVec == 0)
AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
else if (srcVec == 1)
AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
}'''
code3 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = regIndex * eCount + i;
unsigned int srcIdx = absIdx / numRegs;
unsigned int srcVec = absIdx % numRegs;
if (srcVec == 0)
AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
else if (srcVec == 1)
AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
else if (srcVec == 2)
AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
}'''
code4 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = regIndex * eCount + i;
unsigned int srcIdx = absIdx / numRegs;
unsigned int srcVec = absIdx % numRegs;
if (srcVec == 0)
AA64FpDest_x[i] = AA64FpOp1V0S_x[srcIdx];
else if (srcVec == 1)
AA64FpDest_x[i] = AA64FpOp1V1S_x[srcIdx];
else if (srcVec == 2)
AA64FpDest_x[i] = AA64FpOp1V2S_x[srcIdx];
else if (srcVec == 3)
AA64FpDest_x[i] = AA64FpOp1V3S_x[srcIdx];
}'''
iop2 = InstObjParams('intrlv',
'SveIntrlv2Microop',
'MicroOp',
{'code': code2},
['IsMicroop'])
iop3 = InstObjParams('intrlv',
'SveIntrlv3Microop',
'MicroOp',
{'code': code3},
['IsMicroop'])
iop4 = InstObjParams('intrlv',
'SveIntrlv4Microop',
'MicroOp',
{'code': code4},
['IsMicroop'])
header_output += SveIntrlvMicroopDeclare.subst(iop2);
header_output += SveIntrlvMicroopDeclare.subst(iop3);
header_output += SveIntrlvMicroopDeclare.subst(iop4);
exec_output += SveIntrlvMicroopExecute.subst(iop2);
exec_output += SveIntrlvMicroopExecute.subst(iop3);
exec_output += SveIntrlvMicroopExecute.subst(iop4);
for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
for nreg in range(2,5):
substDict = {'targs' : type,
'class_name' : 'SveIntrlv' + str(nreg) + 'Microop'}
exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
def emitSveDeInterleaveMicroop():
global header_output, exec_output, decoders
code2 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = (regIndex + numRegs * i);
unsigned int srcIdx = absIdx % eCount;
unsigned int srcVec = absIdx / eCount;
if (srcVec == 0)
AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
else if(srcVec == 1)
AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
}'''
code3 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = (regIndex + numRegs * i);
unsigned int srcIdx = absIdx % eCount;
unsigned int srcVec = absIdx / eCount;
if (srcVec == 0)
AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
else if(srcVec == 1)
AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
else if(srcVec == 2)
AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
}'''
code4 = sveEnabledCheckCode + '''
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
for (unsigned int i = 0; i < eCount; ++i) {
unsigned int absIdx = (regIndex + numRegs * i);
unsigned int srcIdx = absIdx % eCount;
unsigned int srcVec = absIdx / eCount;
if (srcVec == 0)
AA64FpDest_x[i] = AA64IntrlvReg0_x[srcIdx];
else if(srcVec == 1)
AA64FpDest_x[i] = AA64IntrlvReg1_x[srcIdx];
else if(srcVec == 2)
AA64FpDest_x[i] = AA64IntrlvReg2_x[srcIdx];
else if(srcVec == 3)
AA64FpDest_x[i] = AA64IntrlvReg3_x[srcIdx];
}'''
iop2 = InstObjParams('deintrlv',
'SveDeIntrlv2Microop',
'MicroOp',
{'code': code2},
['IsMicroop'])
iop3 = InstObjParams('deintrlv',
'SveDeIntrlv3Microop',
'MicroOp',
{'code': code3},
['IsMicroop'])
iop4 = InstObjParams('deintrlv',
'SveDeIntrlv4Microop',
'MicroOp',
{'code': code4},
['IsMicroop'])
header_output += SveDeIntrlvMicroopDeclare.subst(iop2);
header_output += SveDeIntrlvMicroopDeclare.subst(iop3);
header_output += SveDeIntrlvMicroopDeclare.subst(iop4);
exec_output += SveIntrlvMicroopExecute.subst(iop2);
exec_output += SveIntrlvMicroopExecute.subst(iop3);
exec_output += SveIntrlvMicroopExecute.subst(iop4);
for type in ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t'):
for nreg in range(2,5):
substDict = {'targs' : type,
'class_name' : 'SveDeIntrlv' + str(nreg) + 'Microop'}
exec_output += SveIntrlvMicroopExecDeclare.subst(substDict)
# Generates definitions for SVE struct load/store microops
def emitSveStructMemInsts(offsetIsImm):
global header_output, exec_output, decoders
eaCode = SPAlignmentCheckCode + '''
int memAccessSize = eCount * sizeof(Element);
EA = memAccessSize * regIndex + XBase + '''
if offsetIsImm:
eaCode += '((int64_t) this->imm * eCount * sizeof(Element))'
else:
eaCode += '(XOffset * sizeof(Element));'
loadMemAccCode = '''
for (int i = 0; i < eCount; i++) {
int gpIdx = (regIndex * eCount + i) / numRegs;
if (GpOp_x[gpIdx]) {
AA64FpDest_x[i] = memDataView[i];
} else {
AA64FpDest_x[i] = 0;
}
}
'''
storeMemAccCode = '''
for (int i = 0; i < eCount; i++) {
int gpIdx = (regIndex * eCount + i) / numRegs;
if (GpOp_x[gpIdx]) {
memDataView[i] = AA64FpDest_x[i];
} else {
memDataView[i] = 0;
for (int j = 0; j < sizeof(Element); j++) {
wrEn[sizeof(Element) * i + j] = false;
}
}
}
'''
storeWrEnableCode = '''
auto wrEn = std::vector<bool>(sizeof(Element) * eCount, true);
'''
loadIop = InstObjParams('ldxx',
'SveLoadRegImmMicroop' if offsetIsImm else 'SveLoadRegRegMicroop',
'MicroOp',
{'targs': 'Element',
'memacc_code': loadMemAccCode,
'ea_code' : sveEnabledCheckCode + eaCode,
'fa_code' : ''},
['IsMemRef', 'IsLoad', 'IsMicroop'])
storeIop = InstObjParams('stxx',
'SveStoreRegImmMicroop' if offsetIsImm
else 'SveStoreRegRegMicroop',
'MicroOp',
{'targs': 'Element',
'wren_code': storeWrEnableCode,
'memacc_code': storeMemAccCode,
'ea_code' : sveEnabledCheckCode + eaCode,
'fa_code' : ''},
['IsMemRef', 'IsStore', 'IsMicroop'])
if offsetIsImm:
header_output += SveStructMemSIMicroopDeclare.subst(loadIop)
header_output += SveStructMemSIMicroopDeclare.subst(storeIop)
else:
header_output += SveStructMemSSMicroopDeclare.subst(loadIop)
header_output += SveStructMemSSMicroopDeclare.subst(storeIop)
exec_output += (
SveStructLoadExecute.subst(loadIop) +
SveStructLoadInitiateAcc.subst(loadIop) +
SveStructLoadCompleteAcc.subst(loadIop) +
SveStructStoreExecute.subst(storeIop) +
SveStructStoreInitiateAcc.subst(storeIop) +
SveStructStoreCompleteAcc.subst(storeIop))
tplArgs = ('uint8_t', 'uint16_t', 'uint32_t', 'uint64_t')
for type in tplArgs:
substDict = {'targs': type,
'class_name': 'SveLoadRegImmMicroop' if offsetIsImm
else 'SveLoadRegRegMicroop'}
exec_output += SveStructMemExecDeclare.subst(substDict)
substDict['class_name'] = ('SveStoreRegImmMicroop' if offsetIsImm
else 'SveStoreRegRegMicroop')
exec_output += SveStructMemExecDeclare.subst(substDict)
# LD1[S]{B,H,W,D} (scalar plus immediate)
# ST1[S]{B,H,W,D} (scalar plus immediate)
# LDNF1[S]{B,H,W,D} (scalar plus immediate)
@@ -1030,6 +1487,13 @@ let {{
# LD1R[S]{B,H,W,D}
emitSveLoadAndRepl()
# LD{2,3,4}{B,H,W,D} (scalar plus immediate)
# ST{2,3,4}{B,H,W,D} (scalar plus immediate)
emitSveStructMemInsts(offsetIsImm = True)
# LD{2,3,4}{B,H,W,D} (scalar plus scalar)
# ST{2,3,4}{B,H,W,D} (scalar plus scalar)
emitSveStructMemInsts(offsetIsImm = False)
# LDR (predicate), STR (predicate)
emitSveMemFillSpill(True)
# LDR (vector), STR (vector)
@@ -1049,4 +1513,8 @@ let {{
# Source vector copy microop for gather loads
emitSveGatherLoadCpySrcVecMicroop()
# ST/LD struct de/interleave microops
emitSveInterleaveMicroop()
emitSveDeInterleaveMicroop()
}};

View File

@@ -530,6 +530,51 @@ def operands {{
'AA64FpDestQV1L': vectorRegElem('0', 'tud', zeroing = True)
}),
# Temporary registers for SVE interleaving
'AA64IntrlvReg0': vectorReg('INTRLVREG0',
{
'AA64IntrlvReg0P0': vectorRegElem('0'),
'AA64IntrlvReg0P1': vectorRegElem('1'),
'AA64IntrlvReg0P2': vectorRegElem('2'),
'AA64IntrlvReg0P3': vectorRegElem('3'),
'AA64IntrlvReg0S': vectorRegElem('0', 'sf', zeroing = True),
'AA64IntrlvReg0D': vectorRegElem('0', 'df', zeroing = True),
'AA64IntrlvReg0Q': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64IntrlvReg1': vectorReg('INTRLVREG1',
{
'AA64IntrlvReg1P0': vectorRegElem('0'),
'AA64IntrlvReg1P1': vectorRegElem('1'),
'AA64IntrlvReg1P2': vectorRegElem('2'),
'AA64IntrlvReg1P3': vectorRegElem('3'),
'AA64IntrlvReg1S': vectorRegElem('0', 'sf', zeroing = True),
'AA64IntrlvReg1D': vectorRegElem('0', 'df', zeroing = True),
'AA64IntrlvReg1Q': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64IntrlvReg2': vectorReg('INTRLVREG2',
{
'AA64IntrlvReg2P0': vectorRegElem('0'),
'AA64IntrlvReg2P1': vectorRegElem('1'),
'AA64IntrlvReg2P2': vectorRegElem('2'),
'AA64IntrlvReg2P3': vectorRegElem('3'),
'AA64IntrlvReg2S': vectorRegElem('0', 'sf', zeroing = True),
'AA64IntrlvReg2D': vectorRegElem('0', 'df', zeroing = True),
'AA64IntrlvReg2Q': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64IntrlvReg3': vectorReg('INTRLVREG3',
{
'AA64IntrlvReg3P0': vectorRegElem('0'),
'AA64IntrlvReg3P1': vectorRegElem('1'),
'AA64IntrlvReg3P2': vectorRegElem('2'),
'AA64IntrlvReg3P3': vectorRegElem('3'),
'AA64IntrlvReg3S': vectorRegElem('0', 'sf', zeroing = True),
'AA64IntrlvReg3D': vectorRegElem('0', 'df', zeroing = True),
'AA64IntrlvReg3Q': vectorRegElem('0', 'tud', zeroing = True)
}),
'AA64FpDestMerge': vectorReg('dest',
{
'AA64FpDestMergeP0': vectorRegElem('0'),

View File

@@ -815,3 +815,449 @@ def template SveGatherLoadCpySrcVecMicroopExecute {{
return fault;
}
}};
def template SveStructMemSIMicroopDeclare {{
template<class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
typedef _Element TPElem;
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
int64_t imm;
uint8_t numRegs;
int regIndex;
unsigned memAccessFlags;
bool baseIsSP;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
int64_t _imm, uint8_t _numRegs, int _regIndex)
: %(base_class)s(mnem, machInst, %(op_class)s),
dest(_dest), gp(_gp), base(_base), imm(_imm),
numRegs(_numRegs), regIndex(_regIndex),
memAccessFlags(ArmISA::TLB::AllowUnaligned |
ArmISA::TLB::MustBeOne)
{
%(constructor)s;
baseIsSP = isSP(_base);
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
virtual void
annotateFault(ArmFault *fault)
{
%(fa_code)s
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
switch (dest) {
case INTRLVREG0:
ccprintf(ss, "INTRLV0");
break;
case INTRLVREG1:
ccprintf(ss, "INTRLV1");
break;
case INTRLVREG2:
ccprintf(ss, "INTRLV2");
break;
case INTRLVREG3:
ccprintf(ss, "INTRLV3");
break;
default:
printVecReg(ss, dest, true);
break;
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
if (_opClass == MemReadOp) {
ccprintf(ss, "/z");
}
ccprintf(ss, ", [");
printVecReg(ss, base, true);
if (imm != 0) {
ccprintf(ss, ", #%d", imm * sizeof(Element));
}
ccprintf(ss, "] (uop reg %d tfer)", regIndex);
return ss.str();
}
};
}};
def template SveStructMemExecDeclare {{
template
Fault %(class_name)s<%(targs)s>::execute(ExecContext *,
Trace::InstRecord *) const;
template
Fault %(class_name)s<%(targs)s>::initiateAcc(ExecContext *,
Trace::InstRecord *) const;
template
Fault %(class_name)s<%(targs)s>::completeAcc(PacketPtr,
ExecContext *, Trace::InstRecord *) const;
}};
def template SveStructLoadExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
TheISA::VecRegContainer memData;
auto memDataView = memData.as<Element>();
if (fault == NoFault) {
fault = xc->readMem(EA, memData.raw_ptr<uint8_t>(), memAccessSize,
this->memAccessFlags);
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template SveStructLoadInitiateAcc {{
template <class Element>
Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
%(op_src_decl)s;
%(op_rd)s;
%(ea_code)s;
if (fault == NoFault) {
fault = xc->initiateMemRead(EA, memAccessSize,
this->memAccessFlags);
}
return fault;
}
}};
def template SveStructLoadCompleteAcc {{
template <class Element>
Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
%(op_decl)s;
%(op_rd)s;
TheISA::VecRegContainer memData;
auto memDataView = memData.as<Element>();
memcpy(memData.raw_ptr<uint8_t>(), pkt->getPtr<uint8_t>(),
pkt->getSize());
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template SveStructStoreExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
TheISA::VecRegContainer memData;
auto memDataView = memData.as<Element>();
%(wren_code)s;
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
this->memAccessFlags, NULL, wrEn);
}
if (fault == NoFault) {
%(op_wb)s;
}
return fault;
}
}};
def template SveStructStoreInitiateAcc {{
template <class Element>
Fault %(class_name)s<Element>::initiateAcc(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Addr EA;
Fault fault = NoFault;
bool aarch64 M5_VAR_USED = true;
unsigned eCount = ArmStaticInst::getCurSveVecLen<Element>(
xc->tcBase());
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
TheISA::VecRegContainer memData;
auto memDataView = memData.as<Element>();
%(wren_code)s;
if (fault == NoFault) {
%(memacc_code)s;
}
if (fault == NoFault) {
fault = xc->writeMem(memData.raw_ptr<uint8_t>(), memAccessSize, EA,
this->memAccessFlags, NULL, wrEn);
}
return fault;
}
}};
def template SveStructStoreCompleteAcc {{
template <class Element>
Fault %(class_name)s<Element>::completeAcc(PacketPtr pkt,
ExecContext *xc, Trace::InstRecord *traceData) const
{
return NoFault;
}
}};
def template SveStructMemSSMicroopDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
typedef _Element TPElem;
IntRegIndex dest;
IntRegIndex gp;
IntRegIndex base;
IntRegIndex offset;
uint8_t numRegs;
int regIndex;
unsigned memAccessFlags;
bool baseIsSP;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _gp, IntRegIndex _base,
IntRegIndex _offset, uint8_t _numRegs, int _regIndex)
: %(base_class)s(mnem, machInst, %(op_class)s),
dest(_dest), gp(_gp), base(_base), offset(_offset),
numRegs(_numRegs), regIndex(_regIndex),
memAccessFlags(ArmISA::TLB::AllowUnaligned |
ArmISA::TLB::MustBeOne)
{
%(constructor)s;
baseIsSP = isSP(_base);
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
Fault initiateAcc(ExecContext *, Trace::InstRecord *) const;
Fault completeAcc(PacketPtr, ExecContext *, Trace::InstRecord *) const;
virtual void
annotateFault(ArmFault *fault)
{
%(fa_code)s
}
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
printMnemonic(ss, "", false);
ccprintf(ss, "{");
switch (dest) {
case INTRLVREG0:
ccprintf(ss, "INTRLV0");
break;
case INTRLVREG1:
ccprintf(ss, "INTRLV1");
break;
case INTRLVREG2:
ccprintf(ss, "INTRLV2");
break;
case INTRLVREG3:
ccprintf(ss, "INTRLV3");
break;
default:
printVecReg(ss, dest, true);
break;
}
ccprintf(ss, "}, ");
printVecPredReg(ss, gp);
if (_opClass == MemReadOp) {
ccprintf(ss, "/z");
}
ccprintf(ss, ", [");
printIntReg(ss, base);
ccprintf(ss, ", ");
printVecReg(ss, offset, true);
ccprintf(ss, "] (uop reg %d tfer)", regIndex);
return ss.str();
}
};
}};
def template SveIntrlvMicroopDeclare {{
template <class _Element>
class %(class_name)s: public %(base_class)s
{
protected:
typedef _Element Element;
typedef _Element TPElem;
IntRegIndex dest;
IntRegIndex op1;
uint8_t numRegs;
int regIndex;
StaticInst *macroOp;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
IntRegIndex _dest, IntRegIndex _op1,
uint8_t _numRegs, int _regIndex, StaticInst *_macroOp)
: MicroOp(mnem, machInst, SimdAluOp),
dest(_dest), op1(_op1), numRegs(_numRegs), regIndex(_regIndex),
macroOp(_macroOp)
{
%(constructor)s;
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
ccprintf(ss, " (uop interleave)");
return ss.str();
}
};
}};
def template SveDeIntrlvMicroopDeclare {{
template <class _Element>
class %(class_name)s : public %(base_class)s
{
protected:
typedef _Element Element;
typedef _Element TPElem;
IntRegIndex dest;
uint8_t numRegs;
int regIndex;
StaticInst *macroOp;
public:
%(class_name)s(const char* mnem, ExtMachInst machInst,
IntRegIndex _dest, uint8_t _numRegs, int _regIndex,
StaticInst *_macroOp)
: MicroOp(mnem, machInst, SimdAluOp),
dest(_dest), numRegs(_numRegs), regIndex(_regIndex),
macroOp(_macroOp)
{
%(constructor)s;
}
Fault execute(ExecContext *, Trace::InstRecord *) const;
std::string
generateDisassembly(Addr pc, const SymbolTable *symtab) const
{
std::stringstream ss;
ccprintf(ss, "%s", macroOp->disassemble(pc, symtab));
ccprintf(ss, " (uop deinterleave)");
return ss.str();
}
};
}};
def template SveIntrlvMicroopExecDeclare {{
template
Fault %(class_name)s<%(targs)s>::execute(
ExecContext *, Trace::InstRecord *) const;
}};
def template SveIntrlvMicroopExecute {{
template <class Element>
Fault %(class_name)s<Element>::execute(ExecContext *xc,
Trace::InstRecord *traceData) const
{
Fault fault = NoFault;
%(op_decl)s;
%(op_rd)s;
%(code)s;
if (fault == NoFault)
{
%(op_wb)s;
}
return fault;
}
}};

View File

@@ -85,15 +85,20 @@ const int NumVecV7ArchRegs = 64;
const int NumVecV8ArchRegs = 32;
const int NumVecSpecialRegs = 8;
const int NumVecIntrlvRegs = 4;
const int NumIntRegs = NUM_INTREGS;
const int NumFloatRegs = NumFloatV8ArchRegs + NumFloatSpecialRegs;
const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs;
const int NumVecRegs = NumVecV8ArchRegs + NumVecSpecialRegs + NumVecIntrlvRegs;
const int VECREG_UREG0 = 32;
const int NumVecPredRegs = 18; // P0-P15, FFR, UREG0
const int PREDREG_FFR = 16;
const int PREDREG_UREG0 = 17;
const int NumCCRegs = NUM_CCREGS;
const int NumMiscRegs = NUM_MISCREGS;
const int INTRLVREG0 = NumVecV8ArchRegs + NumVecSpecialRegs;
const int INTRLVREG1 = INTRLVREG0 + 1;
const int INTRLVREG2 = INTRLVREG0 + 2;
const int INTRLVREG3 = INTRLVREG0 + 3;
#define ISA_HAS_CC_REGS