arch-riscv: Move RVV implementation from header to source (#500)

Move the implementation of RVV template class definition from header to
source can speed up the process of building gem5
This commit is contained in:
Jason Lowe-Power
2023-10-26 17:38:18 -07:00
committed by GitHub
7 changed files with 809 additions and 447 deletions

View File

@@ -32,6 +32,9 @@
#include <string>
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/isa.hh"
#include "arch/riscv/regs/misc.hh"
#include "arch/riscv/regs/vector.hh"
#include "arch/riscv/utility.hh"
#include "cpu/static_inst.hh"
@@ -408,5 +411,95 @@ VMvWholeMicroInst::generateDisassembly(Addr pc,
return ss.str();
}
VMaskMergeMicroInst::VMaskMergeMicroInst(ExtMachInst extMachInst,
uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize)
: VectorArithMicroInst("vmask_mv_micro", extMachInst,
VectorIntegerArithOp, 0, 0),
vlen(_vlen),
elemSize(_elemSize)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]);
_numTypedDestRegs[VecRegClass]++;
for (uint8_t i=0; i<_numSrcs; i++) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]);
}
}
Fault
VMaskMergeMicroInst::execute(ExecContext* xc,
trace::InstRecord* traceData) const
{
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
PCStateBase *pc_ptr = xc->tcBase()->pcState().clone();
auto Vd = tmp_d0.as<uint8_t>();
uint32_t vlenb = pc_ptr->as<PCState>().vlenb();
const uint32_t elems_per_vreg = vlenb / elemSize;
size_t bit_cnt = elems_per_vreg;
vreg_t tmp_s;
xc->getRegOperand(this, 0, &tmp_s);
auto s = tmp_s.as<uint8_t>();
// cp the first result and tail
memcpy(Vd, s, vlenb);
for (uint8_t i = 1; i < this->_numSrcRegs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
if (elems_per_vreg < 8) {
const uint32_t m = (1 << elems_per_vreg) - 1;
const uint32_t mask = m << (i * elems_per_vreg % 8);
// clr & ext bits
Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask;
Vd[bit_cnt/8] |= s[bit_cnt/8] & mask;
bit_cnt += elems_per_vreg;
} else {
const uint32_t byte_offset = elems_per_vreg / 8;
memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset);
}
}
if (traceData)
traceData->setData(vecRegClass, &tmp_d0);
return NoFault;
}
std::string
VMaskMergeMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0));
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
ss << ", " << registerName(srcRegIdx(i));
}
unsigned vlenb = vlen >> 3;
ss << ", offset:" << vlenb / elemSize;
return ss.str();
}
Fault
VxsatMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
{
xc->setMiscReg(MISCREG_VXSAT, *vxsat);
auto vcsr = xc->readMiscReg(MISCREG_VCSR);
xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat));
return NoFault;
}
std::string
VxsatMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0");
return ss.str();
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -34,7 +34,6 @@
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/isa.hh"
#include "arch/riscv/regs/misc.hh"
#include "arch/riscv/regs/vector.hh"
#include "arch/riscv/utility.hh"
#include "cpu/exec_context.hh"
#include "cpu/static_inst.hh"
@@ -539,7 +538,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst
Addr pc, const loader::SymbolTable *symtab) const override;
};
template<typename ElemType>
class VMaskMergeMicroInst : public VectorArithMicroInst
{
private:
@@ -548,75 +547,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst
public:
uint32_t vlen;
size_t elemSize;
VMaskMergeMicroInst(ExtMachInst extMachInst,
uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen)
: VectorArithMicroInst("vmask_mv_micro", extMachInst,
VectorIntegerArithOp, 0, 0),
vlen(_vlen)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]);
_numTypedDestRegs[VecRegClass]++;
for (uint8_t i=0; i<_numSrcs; i++) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]);
}
}
Fault
execute(ExecContext* xc, trace::InstRecord* traceData) const override
{
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
PCStateBase *pc_ptr = xc->tcBase()->pcState().clone();
auto Vd = tmp_d0.as<uint8_t>();
uint32_t vlenb = pc_ptr->as<PCState>().vlenb();
const uint32_t elems_per_vreg = vlenb / sizeof(ElemType);
size_t bit_cnt = elems_per_vreg;
vreg_t tmp_s;
xc->getRegOperand(this, 0, &tmp_s);
auto s = tmp_s.as<uint8_t>();
// cp the first result and tail
memcpy(Vd, s, vlenb);
for (uint8_t i = 1; i < this->_numSrcRegs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
if (elems_per_vreg < 8) {
const uint32_t m = (1 << elems_per_vreg) - 1;
const uint32_t mask = m << (i * elems_per_vreg % 8);
// clr & ext bits
Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask;
Vd[bit_cnt/8] |= s[bit_cnt/8] & mask;
bit_cnt += elems_per_vreg;
} else {
const uint32_t byte_offset = elems_per_vreg / 8;
memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset);
}
}
if (traceData)
traceData->setData(vecRegClass, &tmp_d0);
return NoFault;
}
std::string
generateDisassembly(Addr pc, const loader::SymbolTable *symtab)
const override
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0));
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
ss << ", " << registerName(srcRegIdx(i));
}
unsigned vlenb = vlen >> 3;
ss << ", offset:" << vlenb / sizeof(ElemType);
return ss.str();
}
uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen, size_t _elemSize);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(Addr,
const loader::SymbolTable *) const override;
};
class VxsatMicroInst : public VectorArithMicroInst
@@ -630,21 +566,9 @@ class VxsatMicroInst : public VectorArithMicroInst
{
vxsat = Vxsat;
}
Fault
execute(ExecContext* xc, trace::InstRecord* traceData) const override
{
xc->setMiscReg(MISCREG_VXSAT,*vxsat);
auto vcsr = xc->readMiscReg(MISCREG_VCSR);
xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat));
return NoFault;
}
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab)
const override
{
std::stringstream ss;
ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0");
return ss.str();
}
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(Addr, const loader::SymbolTable *)
const override;
};
} // namespace RiscvISA

File diff suppressed because it is too large Load Diff

View File

@@ -34,6 +34,14 @@ def setVlen():
def setVlenb():
return "uint32_t vlenb = VlenbBits;\n"
def declareVMemTemplate(class_name):
return f'''
template class {class_name}<uint8_t>;
template class {class_name}<uint16_t>;
template class {class_name}<uint32_t>;
template class {class_name}<uint64_t>;
'''
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
inst_flags, base_class, postacc_code='',
declare_template_base=VMemMacroDeclare,
@@ -47,22 +55,20 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
iop = InstObjParams(name, Name, base_class,
{'ea_code': ea_code,
'memacc_code': memacc_code,
'postacc_code': postacc_code },
'postacc_code': postacc_code,
'declare_vmem_template': declareVMemTemplate(Name)},
inst_flags)
constructTemplate = eval(exec_template_base + 'Constructor')
header_output = declare_template_base.subst(iop)
decoder_output = ''
if declare_template_base is not VMemTemplateMacroDeclare:
decoder_output += constructTemplate.subst(iop)
else:
header_output += constructTemplate.subst(iop)
decoder_output = constructTemplate.subst(iop)
decode_block = decode_template.subst(iop)
exec_output = ''
if not is_macroop:
return (header_output, decoder_output, decode_block, exec_output)
micro_class_name = exec_template_base + 'MicroInst'
microiop = InstObjParams(name + '_micro',
Name + 'Micro',
exec_template_base + 'MicroInst',
@@ -70,7 +76,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
'memacc_code': memacc_code,
'postacc_code': postacc_code,
'set_vlenb': setVlenb(),
'set_vlen': setVlen()},
'set_vlen': setVlen(),
'declare_vmem_template': declareVMemTemplate(Name + 'Micro')},
inst_flags)
if mem_flags:
@@ -79,17 +86,16 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
microiop.constructor += s
microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare')
microConsTemplate = eval(exec_template_base + 'Micro' + 'Constructor')
microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute')
microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc')
microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc')
header_output = microDeclTemplate.subst(microiop) + header_output
decoder_output = microConsTemplate.subst(microiop) + decoder_output
micro_exec_output = (microExecTemplate.subst(microiop) +
microInitTemplate.subst(microiop) +
microCompTemplate.subst(microiop))
if declare_template_base is not VMemTemplateMacroDeclare:
exec_output += micro_exec_output
else:
header_output += micro_exec_output
exec_output += micro_exec_output
return (header_output, decoder_output, decode_block, exec_output)

View File

@@ -46,8 +46,6 @@ output header {{
#include <softfloat.h>
#include <specialize.h>
#include "arch/generic/memhelpers.hh"
#include "arch/riscv/decoder.hh"
#include "arch/riscv/insts/amo.hh"
#include "arch/riscv/insts/bs.hh"
#include "arch/riscv/insts/compressed.hh"

View File

@@ -107,6 +107,9 @@ template<typename ElemType>
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorIntMicroDeclare {{
@@ -145,6 +148,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorIntMicroExecute {{
@@ -182,6 +187,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorIntExtMacroDeclare {{
@@ -193,14 +200,7 @@ private:
public:
%(class_name)s(ExtMachInst _machInst, uint32_t _vlen);
std::string generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const override
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< registerName(srcRegIdx(0));
if (machInst.vm == 0) ss << ", v0.t";
return ss.str();
}
const loader::SymbolTable *symtab) const override;
};
}};
@@ -219,14 +219,7 @@ public:
uint8_t _microIdx);
Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override;
std::string generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const override
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< registerName(srcRegIdx(0));
if (machInst.vm == 0) ss << ", v0.t";
return ss.str();
}
const loader::SymbolTable *symtab) const override;
};
}};
@@ -303,6 +296,38 @@ Fault
return NoFault;
}
template <typename ElemType>
std::string
%(class_name)s<ElemType>::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< registerName(srcRegIdx(0));
if (machInst.vm == 0) ss << ", v0.t";
return ss.str();
}
%(declare_varith_template)s;
}};
def template VectorIntExtMacroExecute {{
template <typename ElemType>
std::string
%(class_name)s<ElemType>::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< registerName(srcRegIdx(0));
if (machInst.vm == 0) ss << ", v0.t";
return ss.str();
}
%(declare_varith_template)s;
}};
def template VectorIntDecodeBlock {{
@@ -365,6 +390,8 @@ template<typename ElemType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorIntWideningMicroDeclare {{
@@ -402,6 +429,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorIntWideningMicroExecute {{
@@ -447,6 +476,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorIntNarrowingMicroExecute {{
@@ -493,6 +524,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorIntWideningDecodeBlock {{
@@ -546,6 +579,9 @@ template<typename ElemType>
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorFloatMicroDeclare {{
@@ -582,6 +618,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorFloatMicroExecute {{
@@ -620,6 +658,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatDecodeBlock {{
@@ -725,6 +765,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatNarrowingMicroExecute {{
@@ -772,6 +814,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatWideningDecodeBlock {{
@@ -826,6 +870,8 @@ template<typename ElemType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template ViotaMicroDeclare {{
@@ -865,6 +911,8 @@ template<typename ElemType>
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]);
}
%(declare_varith_template)s;
}};
def template ViotaMicroExecute {{
@@ -899,6 +947,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
@@ -915,6 +965,8 @@ template<typename ElemType>
%(set_vm_idx)s;
}
%(declare_varith_template)s;
}};
def template Vector1Vs1VdMaskExecute {{
@@ -948,6 +1000,8 @@ Fault
return NoFault;
};
%(declare_varith_template)s;
}};
def template Vector1Vs1RdMaskDeclare {{
@@ -978,6 +1032,8 @@ template<typename ElemType>
%(set_vm_idx)s;
}
%(declare_varith_template)s;
}};
def template Vector1Vs1RdMaskExecute {{
@@ -1010,6 +1066,8 @@ Fault
return NoFault;
};
%(declare_varith_template)s;
}};
def template VectorIntMaskMacroDeclare {{
@@ -1049,14 +1107,16 @@ template<typename ElemType>
this->microops.push_back(microop);
micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
}
microop = new VMaskMergeMicroInst<ElemType>(_machInst, _machInst.vd,
this->microops.size(), _vlen);
microop = new VMaskMergeMicroInst(_machInst, _machInst.vd,
this->microops.size(), _vlen, sizeof(ElemType));
this->microops.push_back(microop);
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorIntMaskMicroDeclare {{
@@ -1095,6 +1155,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorIntMaskMicroExecute {{
@@ -1133,6 +1195,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatMaskMacroDeclare {{
@@ -1172,14 +1236,16 @@ template<typename ElemType>
this->microops.push_back(microop);
micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax);
}
microop = new VMaskMergeMicroInst<ElemType>(_machInst, _machInst.vd,
this->microops.size(), _vlen);
microop = new VMaskMergeMicroInst(_machInst, _machInst.vd,
this->microops.size(), _vlen, sizeof(ElemType));
this->microops.push_back(microop);
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorFloatMaskMicroDeclare {{
@@ -1217,6 +1283,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorFloatMaskMicroExecute {{
@@ -1255,6 +1323,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VMvWholeMacroDeclare {{
@@ -1381,6 +1451,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorMaskExecute {{
@@ -1415,6 +1487,8 @@ Fault
return NoFault;
};
%(declare_varith_template)s;
}};
def template VectorMaskDecodeBlock {{
@@ -1449,6 +1523,8 @@ template<typename ElemType>
%(set_vm_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorIntNonSplitExecute {{
@@ -1481,6 +1557,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatNonSplitExecute {{
@@ -1513,6 +1591,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorFloatNonSplitDecodeBlock {{
@@ -1578,6 +1658,8 @@ template<typename ElemType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorReduceMicroDeclare {{
@@ -1615,6 +1697,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorReduceIntMicroExecute {{
@@ -1664,6 +1748,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorReduceFloatMicroExecute {{
@@ -1715,6 +1801,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorReduceFloatWideningMicroExecute {{
@@ -1765,6 +1853,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorGatherMacroDeclare {{
@@ -1824,6 +1914,8 @@ template<typename ElemType, typename IndexType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorGatherMicroDeclare {{
@@ -1873,6 +1965,8 @@ template<typename ElemType, typename IndexType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorGatherMicroExecute {{
@@ -1930,6 +2024,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorGatherDecodeBlock {{
@@ -2003,6 +2099,9 @@ template<typename ElemType>
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorIntVxsatMicroDeclare {{
@@ -2041,6 +2140,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorReduceIntWideningMicroExecute {{
@@ -2094,6 +2195,8 @@ Fault
return NoFault;
}
%(declare_varith_template)s;
}};
def template VectorSlideMacroDeclare {{
@@ -2142,6 +2245,8 @@ template<typename ElemType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorSlideDownMacroConstructor {{
@@ -2177,6 +2282,8 @@ template<typename ElemType>
this->microops.back()->setLastMicroop();
}
%(declare_varith_template)s;
}};
def template VectorSlideMicroDeclare {{
@@ -2215,6 +2322,8 @@ template<typename ElemType>
%(set_src_reg_idx)s;
}
%(declare_varith_template)s;
}};
def template VectorSlideMicroExecute {{
@@ -2254,6 +2363,8 @@ Fault
return NoFault;
};
%(declare_varith_template)s;
}};
def template VectorFloatSlideMicroExecute {{
@@ -2293,4 +2404,6 @@ Fault
return NoFault;
};
%(declare_varith_template)s;
}};

View File

@@ -96,22 +96,8 @@ private:
RegId srcRegIdxArr[3];
RegId destRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
}
%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
uint8_t _microIdx, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -123,6 +109,27 @@ public:
}};
def template VleMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint8_t _microVl,
uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s(
"%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
}
}};
def template VleMicroExecute {{
Fault
@@ -293,21 +300,7 @@ private:
RegId destRegIdxArr[0];
public:
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsVector] = true;
this->flags[IsStore] = true;
}
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -318,6 +311,27 @@ public:
}};
def template VseMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s(
"%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsVector] = true;
this->flags[IsStore] = true;
}
}};
def template VseMicroExecute {{
Fault
@@ -518,18 +532,8 @@ private:
RegId srcRegIdxArr[2];
public:
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst,
%(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
this->flags[IsVector] = true;
this->flags[IsStore] = true;
}
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
@@ -539,6 +543,24 @@ public:
}};
def template VsWholeMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s(
"%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]);
this->flags[IsVector] = true;
this->flags[IsStore] = true;
}
}};
def template VsWholeMicroExecute {{
Fault
@@ -644,19 +666,8 @@ private:
RegId srcRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst,
uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s_micro", _machInst,
%(op_class)s, _microVl, _microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
this->flags[IsVector] = true;
this->flags[IsLoad] = true;
}
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
@@ -666,6 +677,25 @@ public:
}};
def template VlWholeMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst,
uint8_t _microVl, uint8_t _microIdx, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, _microVl,
_microIdx, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
this->flags[IsVector] = true;
this->flags[IsLoad] = true;
}
}};
def template VlWholeMicroExecute {{
Fault
@@ -803,24 +833,7 @@ private:
RegId destRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsLoad] = true;
}
uint32_t _microVl);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -831,6 +844,31 @@ public:
}};
def template VlStrideMicroConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsLoad] = true;
}
}};
def template VlStrideMicroExecute {{
Fault
@@ -1019,21 +1057,7 @@ private:
RegId destRegIdxArr[0];
public:
%(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsStore] = true;
}
uint32_t _microVl);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -1044,6 +1068,28 @@ public:
}};
def template VsStrideMicroConstructor {{
%(class_name)s::%(class_name)s(
ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx,
uint32_t _microVl)
: %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s,
_regIdx, _microIdx, _microVl)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsStore] = true;
}
}};
def template VsStrideMicroExecute {{
Fault
@@ -1185,6 +1231,8 @@ template<typename ElemType>
this->flags[IsVector] = true;
}
%(declare_vmem_template)s;
}};
def template VlIndexMicroDeclare {{
@@ -1199,24 +1247,7 @@ private:
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _vdRegIdx, uint8_t _vdElemIdx,
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsLoad] = true;
}
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -1227,6 +1258,34 @@ public:
}};
def template VlIndexMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(
ExtMachInst _machInst,uint8_t _vdRegIdx, uint8_t _vdElemIdx,
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsLoad] = true;
}
%(declare_vmem_template)s;
}};
def template VlIndexMicroExecute {{
template<typename ElemType>
@@ -1364,6 +1423,8 @@ Fault
return NoFault;
}
%(declare_vmem_template)s;
}};
def template VsIndexConstructor {{
@@ -1410,6 +1471,8 @@ template<typename ElemType>
this->flags[IsVector] = true;
}
%(declare_vmem_template)s;
}};
def template VsIndexMicroDeclare {{
@@ -1424,22 +1487,7 @@ private:
public:
%(class_name)s(ExtMachInst _machInst,
uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx,
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsStore] = true;
}
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
@@ -1450,6 +1498,32 @@ public:
}};
def template VsIndexMicroConstructor {{
template<typename ElemType>
%(class_name)s<ElemType>::%(class_name)s(ExtMachInst _machInst,
uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx,
uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s,
_vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsStore] = true;
}
%(declare_vmem_template)s;
}};
def template VsIndexMicroExecute {{
template<typename ElemType>
@@ -1548,6 +1622,8 @@ Fault
return NoFault;
}
%(declare_vmem_template)s;
}};
def template VMemBaseDecodeBlock {{