arch-riscv: adding vector unit-stride segment loads to RISC-V (#851)
This commit adds support for vector unit-stride segment load operations for RISC-V (vlseg<NF>e<X>). This implementation is based in two types of microops: - VlSeg microops that load data as it is organized in memory in structs of several fields. - VectorDeIntrlv microops that properly deinterleave structs into destination registers. Gem5 issue: https://github.com/gem5/gem5/issues/382
This commit is contained in:
@@ -588,5 +588,102 @@ VlFFTrimVlMicroOp::generateDisassembly(Addr pc,
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string VlSegMacroInst::generateDisassembly(Addr pc,
|
||||
const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
|
||||
'(' << registerName(srcRegIdx(0)) << ')' <<
|
||||
", " << registerName(srcRegIdx(1));
|
||||
if (!machInst.vm)
|
||||
ss << ", v0.t";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
std::string VlSegMicroInst::generateDisassembly(Addr pc,
|
||||
const loader::SymbolTable *symtab) const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
|
||||
'(' << registerName(srcRegIdx(0)) << ')' <<
|
||||
", "<< registerName(srcRegIdx(1));
|
||||
if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
|
||||
ss << ", " << registerName(srcRegIdx(2));
|
||||
if (!machInst.vm)
|
||||
ss << ", v0.t";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
VlSegDeIntrlvMicroInst::VlSegDeIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
|
||||
uint32_t _dstReg, uint32_t _numSrcs,
|
||||
uint32_t _microIdx, uint32_t _numMicroops,
|
||||
uint32_t _field, uint32_t _vlen, uint32_t _sizeOfElement)
|
||||
: VectorArithMicroInst("vlseg_deintrlv_micro", extMachInst,
|
||||
VectorIntegerArithOp, 0, 0),
|
||||
vlen(_vlen)
|
||||
{
|
||||
setRegIdxArrays(
|
||||
reinterpret_cast<RegIdArrayPtr>(
|
||||
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
|
||||
reinterpret_cast<RegIdArrayPtr>(
|
||||
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
|
||||
|
||||
_numSrcRegs = 0;
|
||||
_numDestRegs = 0;
|
||||
numSrcs = _numSrcs;
|
||||
numMicroops = _numMicroops;
|
||||
field =_field;
|
||||
sizeOfElement = _sizeOfElement;
|
||||
microIdx = _microIdx;
|
||||
micro_vl = _micro_vl;
|
||||
|
||||
setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]);
|
||||
_numTypedDestRegs[VecRegClass]++;
|
||||
for (uint32_t i=0; i < _numSrcs; i++) {
|
||||
uint32_t index = VecMemInternalReg0 + i + (microIdx * _numSrcs);
|
||||
setSrcRegIdx(_numSrcRegs++, vecRegClass[index]);
|
||||
}
|
||||
}
|
||||
|
||||
Fault
|
||||
VlSegDeIntrlvMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
|
||||
{
|
||||
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
|
||||
auto Vd = tmp_d0.as<uint8_t>();
|
||||
const uint32_t elems_per_vreg = micro_vl;
|
||||
vreg_t tmp_s;
|
||||
auto s = tmp_s.as<uint8_t>();
|
||||
uint32_t elem = 0;
|
||||
uint32_t index = field;
|
||||
for (uint32_t i = 0; i < numSrcs; i++) {
|
||||
xc->getRegOperand(this, i, &tmp_s);
|
||||
s = tmp_s.as<uint8_t>();
|
||||
while(index < (i + 1) * elems_per_vreg)
|
||||
{
|
||||
memcpy(Vd + (elem * sizeOfElement),
|
||||
s + ((index % elems_per_vreg) * sizeOfElement),
|
||||
sizeOfElement);
|
||||
index += numSrcs;
|
||||
elem++;
|
||||
}
|
||||
}
|
||||
if (traceData)
|
||||
traceData->setData(vecRegClass, &tmp_d0);
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
std::string
|
||||
VlSegDeIntrlvMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab)
|
||||
const
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << mnemonic << ' ' << registerName(destRegIdx(0));
|
||||
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
|
||||
ss << ", " << registerName(srcRegIdx(i));
|
||||
}
|
||||
ss << ", field: " << field;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
} // namespace RiscvISA
|
||||
} // namespace gem5
|
||||
|
||||
@@ -596,6 +596,65 @@ class VlFFTrimVlMicroOp : public VectorMicroInst
|
||||
const override;
|
||||
};
|
||||
|
||||
class VlSegMacroInst : public VectorMemMacroInst
|
||||
{
|
||||
protected:
|
||||
VlSegMacroInst(const char* mnem, ExtMachInst _machInst,
|
||||
OpClass __opClass, uint32_t _vlen)
|
||||
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
|
||||
{}
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
|
||||
class VlSegMicroInst : public VectorMicroInst
|
||||
{
|
||||
protected:
|
||||
Request::Flags memAccessFlags;
|
||||
uint8_t regIdx;
|
||||
|
||||
VlSegMicroInst(const char *mnem, ExtMachInst _machInst,
|
||||
OpClass __opClass, uint32_t _microVl,
|
||||
uint32_t _microIdx, uint32_t _numMicroops,
|
||||
uint32_t _field, uint32_t _numFields,
|
||||
uint32_t _vlen)
|
||||
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
|
||||
_microIdx, _vlen)
|
||||
{
|
||||
this->flags[IsLoad] = true;
|
||||
}
|
||||
|
||||
std::string generateDisassembly(
|
||||
Addr pc, const loader::SymbolTable *symtab) const override;
|
||||
};
|
||||
|
||||
class VlSegDeIntrlvMicroInst : public VectorArithMicroInst
|
||||
{
|
||||
private:
|
||||
RegId srcRegIdxArr[NumVecInternalRegs];
|
||||
RegId destRegIdxArr[1];
|
||||
uint32_t numSrcs;
|
||||
uint32_t numMicroops;
|
||||
uint32_t field;
|
||||
uint32_t sizeOfElement;
|
||||
uint32_t micro_vl;
|
||||
|
||||
public:
|
||||
uint32_t vlen;
|
||||
|
||||
VlSegDeIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
|
||||
uint32_t _dstReg, uint32_t _numSrcs,
|
||||
uint32_t _microIdx, uint32_t _numMicroops,
|
||||
uint32_t _field, uint32_t _vlen,
|
||||
uint32_t _sizeOfElement);
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
|
||||
std::string generateDisassembly(Addr,
|
||||
const loader::SymbolTable *) const override;
|
||||
};
|
||||
|
||||
} // namespace RiscvISA
|
||||
} // namespace gem5
|
||||
|
||||
|
||||
@@ -599,6 +599,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
|
||||
0x0: decode MOP {
|
||||
0x0: decode LUMOP {
|
||||
0x00: decode NF {
|
||||
0x00: VleOp::vle8_v({{
|
||||
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||
i < this->microVl) {
|
||||
@@ -607,6 +608,65 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideLoadOp);
|
||||
format VlSegOp {
|
||||
0x01: vlseg2e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x02: vlseg3e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x03: vlseg4e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x04: vlseg5e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x05: vlseg6e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x06: vlseg7e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x07: vlseg8e8_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||
} else {
|
||||
Vd_ub[i] = Vs2_ub[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
}
|
||||
}
|
||||
0x08: decode NF {
|
||||
format VlWholeOp {
|
||||
0x0: vl1re8_v({{
|
||||
@@ -651,6 +711,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}
|
||||
0x5: decode MOP {
|
||||
0x0: decode LUMOP {
|
||||
0x00: decode NF {
|
||||
0x00: VleOp::vle16_v({{
|
||||
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||
i < this->microVl) {
|
||||
@@ -659,6 +720,65 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideLoadOp);
|
||||
format VlSegOp {
|
||||
0x01: vlseg2e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x02: vlseg3e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x03: vlseg4e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x04: vlseg5e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x05: vlseg6e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x06: vlseg7e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x07: vlseg8e16_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||
} else {
|
||||
Vd_uh[i] = Vs2_uh[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
}
|
||||
}
|
||||
0x08: decode NF {
|
||||
format VlWholeOp {
|
||||
0x0: vl1re16_v({{
|
||||
@@ -700,6 +820,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}
|
||||
0x6: decode MOP {
|
||||
0x0: decode LUMOP {
|
||||
0x00: decode NF {
|
||||
0x00: VleOp::vle32_v({{
|
||||
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||
i < this->microVl) {
|
||||
@@ -708,6 +829,65 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideLoadOp);
|
||||
format VlSegOp {
|
||||
0x01: vlseg2e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x02: vlseg3e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x03: vlseg4e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x04: vlseg5e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x05: vlseg6e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x06: vlseg7e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x07: vlseg8e32_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
|
||||
i < this->microVl) {
|
||||
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||
} else {
|
||||
Vd_uw[i] = Vs2_uw[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
}
|
||||
}
|
||||
0x08: decode NF {
|
||||
format VlWholeOp {
|
||||
0x0: vl1re32_v({{
|
||||
@@ -749,6 +929,7 @@ decode QUADRANT default Unknown::unknown() {
|
||||
}
|
||||
0x7: decode MOP {
|
||||
0x0: decode LUMOP {
|
||||
0x00: decode NF {
|
||||
0x00: VleOp::vle64_v({{
|
||||
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||
i < this->microVl) {
|
||||
@@ -757,6 +938,65 @@ decode QUADRANT default Unknown::unknown() {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideLoadOp);
|
||||
format VlSegOp {
|
||||
0x01: vlseg2e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x02: vlseg3e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x03: vlseg4e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x04: vlseg5e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x05: vlseg6e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x06: vlseg7e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
0x07: vlseg8e64_v({{
|
||||
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
|
||||
i < this->microVl) {
|
||||
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||
} else {
|
||||
Vd_ud[i] = Vs2_ud[i];
|
||||
}
|
||||
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
||||
}
|
||||
}
|
||||
0x08: decode NF {
|
||||
format VlWholeOp {
|
||||
0x0: vl1re64_v({{
|
||||
|
||||
@@ -245,3 +245,16 @@ def format VsIndexOp(
|
||||
decode_template=VMemSplitTemplateDecodeBlock
|
||||
)
|
||||
}};
|
||||
|
||||
def format VlSegOp(
|
||||
memacc_code,
|
||||
ea_code={{
|
||||
EA = Rs1 + mem_size * (microIdx + (field * numMicroops));
|
||||
}},
|
||||
mem_flags=[],
|
||||
inst_flags=[]
|
||||
) {{
|
||||
(header_output, decoder_output, decode_block, exec_output) = \
|
||||
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
|
||||
'VlSegMacroInst', exec_template_base='VlSeg')
|
||||
}};
|
||||
|
||||
@@ -1636,6 +1636,238 @@ Fault
|
||||
|
||||
}};
|
||||
|
||||
def template VlSegConstructor {{
|
||||
|
||||
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
|
||||
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
%(constructor)s;
|
||||
|
||||
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
|
||||
const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
|
||||
int32_t remaining_vl = this->vl;
|
||||
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
|
||||
size_t NFIELDS = machInst.nf + 1;
|
||||
StaticInstPtr microop;
|
||||
uint32_t size_per_elem = width_EEW(_machInst.width) / 8;
|
||||
|
||||
if (micro_vl == 0) {
|
||||
microop = new VectorNopMicroInst(_machInst);
|
||||
this->microops.push_back(microop);
|
||||
} else {
|
||||
for (int f = 0; f < NFIELDS; ++f) {
|
||||
remaining_vl = this->vl;
|
||||
micro_vl = std::min(remaining_vl, micro_vlmax);
|
||||
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
|
||||
microop = new %(class_name)sMicro(_machInst, micro_vl, i, num_microops, f, NFIELDS, vlen);
|
||||
microop->setDelayedCommit();
|
||||
microop->setFlag(IsLoad);
|
||||
this->microops.push_back(microop);
|
||||
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
|
||||
}
|
||||
}
|
||||
for (int f = 0; f < NFIELDS; ++f) {
|
||||
remaining_vl = this->vl;
|
||||
micro_vl = std::min(remaining_vl, micro_vlmax);
|
||||
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
|
||||
microop = new VlSegDeIntrlvMicroInst(_machInst, micro_vl, _machInst.vd + i + (f * num_microops),
|
||||
NFIELDS, i, num_microops, f, vlen, size_per_elem);
|
||||
this->microops.push_back(microop);
|
||||
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this->microops.front()->setFlag(IsFirstMicroop);
|
||||
this->microops.back()->setFlag(IsLastMicroop);
|
||||
this->flags[IsVector] = true;
|
||||
}
|
||||
}};
|
||||
|
||||
def template VlSegMicroDeclare {{
|
||||
|
||||
class %(class_name)s : public %(base_class)s
|
||||
{
|
||||
private:
|
||||
// rs1, rs2, vd, vm
|
||||
RegId srcRegIdxArr[4];
|
||||
RegId destRegIdxArr[1];
|
||||
uint32_t field;
|
||||
uint32_t numFields;
|
||||
uint32_t numMicroops;
|
||||
public:
|
||||
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field, uint32_t _numFields, uint32_t _vlen);
|
||||
|
||||
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
|
||||
Fault completeAcc(PacketPtr, ExecContext *,
|
||||
trace::InstRecord *) const override;
|
||||
using %(base_class)s::generateDisassembly;
|
||||
};
|
||||
|
||||
}};
|
||||
|
||||
def template VlSegMicroConstructor {{
|
||||
|
||||
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field, uint32_t _numFields, uint32_t _vlen)
|
||||
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx , _numMicroops, _field, _numFields, _vlen)
|
||||
{
|
||||
%(set_reg_idx_arr)s;
|
||||
|
||||
_numSrcRegs = 0;
|
||||
_numDestRegs = 0;
|
||||
field = _field;
|
||||
numFields = _numFields;
|
||||
numMicroops = _numMicroops;
|
||||
setDestRegIdx(_numDestRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
|
||||
(field * numMicroops)]);
|
||||
_numTypedDestRegs[VecRegClass]++;
|
||||
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
|
||||
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
|
||||
(field * numMicroops)]);
|
||||
if (!_machInst.vm) {
|
||||
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
|
||||
}
|
||||
}
|
||||
|
||||
}};
|
||||
|
||||
def template VlSegMicroExecute {{
|
||||
|
||||
Fault
|
||||
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
|
||||
{
|
||||
Addr EA;
|
||||
uint32_t mem_size = width_EEW(machInst.width) / 8 * microVl;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(set_vlen)s;
|
||||
%(ea_code)s;
|
||||
|
||||
RiscvISA::vreg_t tmp_v0;
|
||||
uint8_t *v0;
|
||||
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
||||
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||
|
||||
if (!misa.rvv || status.vs == VPUStatus::OFF) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
"RVV is disabled or VPU is off", machInst);
|
||||
}
|
||||
|
||||
if (machInst.vill)
|
||||
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
|
||||
|
||||
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
|
||||
|
||||
panic_if((pow(2, vlmul) * this->numFields) > 8, "LMUL value is illegal for vlseg inst");
|
||||
|
||||
status.vs = VPUStatus::DIRTY;
|
||||
xc->setMiscReg(MISCREG_STATUS, status);
|
||||
|
||||
if(!machInst.vm) {
|
||||
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
|
||||
v0 = tmp_v0.as<uint8_t>();
|
||||
}
|
||||
|
||||
const std::vector<bool> byte_enable(mem_size, true);
|
||||
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
|
||||
byte_enable);
|
||||
|
||||
if (fault != NoFault)
|
||||
return fault;
|
||||
|
||||
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
||||
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
||||
|
||||
size_t ei;
|
||||
|
||||
for (size_t i = 0; i < micro_elems; i++) {
|
||||
ei = i + micro_vlmax * microIdx;
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
%(op_wb)s;
|
||||
return fault;
|
||||
}
|
||||
|
||||
}};
|
||||
|
||||
def template VlSegMicroInitiateAcc {{
|
||||
|
||||
Fault
|
||||
%(class_name)s::initiateAcc(ExecContext* xc,
|
||||
trace::InstRecord* traceData) const
|
||||
{
|
||||
|
||||
Addr EA;
|
||||
uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;
|
||||
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(ea_code)s;
|
||||
|
||||
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
||||
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||
if (!misa.rvv || status.vs == VPUStatus::OFF) {
|
||||
return std::make_shared<IllegalInstFault>(
|
||||
"RVV is disabled or VPU is off", machInst);
|
||||
}
|
||||
if (machInst.vill)
|
||||
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
|
||||
|
||||
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
|
||||
|
||||
panic_if((pow(2, vlmul) * this->numFields) > 8, "LMUL value is illegal for vlseg inst");
|
||||
|
||||
const std::vector<bool> byte_enable(mem_size, true);
|
||||
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
|
||||
byte_enable);
|
||||
return fault;
|
||||
}
|
||||
|
||||
}};
|
||||
|
||||
def template VlSegMicroCompleteAcc {{
|
||||
|
||||
Fault
|
||||
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
|
||||
trace::InstRecord *traceData) const
|
||||
{
|
||||
%(op_decl)s;
|
||||
%(op_rd)s;
|
||||
%(set_vlen)s;
|
||||
|
||||
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||
status.vs = VPUStatus::DIRTY;
|
||||
xc->setMiscReg(MISCREG_STATUS, status);
|
||||
|
||||
RiscvISA::vreg_t tmp_v0;
|
||||
uint8_t *v0;
|
||||
if(!machInst.vm) {
|
||||
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
|
||||
v0 = tmp_v0.as<uint8_t>();
|
||||
}
|
||||
|
||||
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
|
||||
|
||||
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
||||
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
||||
|
||||
size_t ei;
|
||||
for (size_t i = 0; i < micro_elems; i++) {
|
||||
ei = i + micro_vlmax * microIdx;
|
||||
%(memacc_code)s;
|
||||
}
|
||||
|
||||
%(op_wb)s;
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
}};
|
||||
|
||||
|
||||
def template VMemBaseDecodeBlock {{
|
||||
return new %(class_name)s(machInst, vlen);
|
||||
}};
|
||||
|
||||
@@ -306,6 +306,17 @@ elem_mask(const T* vs, const int index)
|
||||
return (vs[idx] >> pos) & 1;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline int
|
||||
elem_mask_vlseg(const T* vs, const int elem, const int num_fields)
|
||||
{
|
||||
int index = floor(elem / num_fields);
|
||||
static_assert(std::is_integral_v<T>);
|
||||
int idx = index / (sizeof(T)*8);
|
||||
int pos = index % (sizeof(T)*8);
|
||||
return (vs[idx] >> pos) & 1;
|
||||
}
|
||||
|
||||
template<typename FloatType, typename IntType = decltype(FloatType::v)> auto
|
||||
ftype(IntType a) -> FloatType
|
||||
{
|
||||
|
||||
@@ -112,6 +112,7 @@ class OpClass(Enum):
|
||||
"VectorWholeRegisterLoad",
|
||||
"VectorWholeRegisterStore",
|
||||
"VectorIntegerArith",
|
||||
"VectorUnitStrideSegmentedLoad",
|
||||
"VectorFloatArith",
|
||||
"VectorFloatConvert",
|
||||
"VectorIntegerReduce",
|
||||
|
||||
@@ -261,6 +261,7 @@ class MinorDefaultVecFU(MinorFU):
|
||||
"VectorIndexedLoad",
|
||||
"VectorIndexedStore",
|
||||
"VectorUnitStrideFaultOnlyFirstLoad",
|
||||
"VectorUnitStrideSegmentedLoad",
|
||||
"VectorWholeRegisterLoad",
|
||||
"VectorWholeRegisterStore",
|
||||
"VectorIntegerArith",
|
||||
|
||||
@@ -133,6 +133,7 @@ static const OpClass VectorIntegerReduceOp = enums::VectorIntegerReduce;
|
||||
static const OpClass VectorFloatReduceOp = enums::VectorFloatReduce;
|
||||
static const OpClass VectorMiscOp = enums::VectorMisc;
|
||||
static const OpClass VectorIntegerExtensionOp = enums::VectorIntegerExtension;
|
||||
static const OpClass VectorUnitStrideSegmentedLoadOp = enums::VectorUnitStrideSegmentedLoad;
|
||||
static const OpClass VectorConfigOp = enums::VectorConfig;
|
||||
static const OpClass Num_OpClasses = enums::Num_OpClass;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user