arch-riscv: adding vector unit-stride segment stores to RISC-V (#913)

This commit adds support for vector unit-stride segment store operations
for RISC-V (vssegXeXX). This implementation is based in two types of
microops:
- VsSegIntrlv microops that properly interleave source registers into
structs.
- VsSeg microops that store data in memory as contiguous structs of
several fields.

Change-Id: Id80dd4e781743a60eb76c18b6a28061f8e9f723d

Gem5 issue: https://github.com/gem5/gem5/issues/382
This commit is contained in:
Ivan Fernandez
2024-03-22 23:45:58 +01:00
committed by GitHub
parent 7d62da6d10
commit 1e743fd85a
8 changed files with 555 additions and 42 deletions

View File

@@ -501,7 +501,6 @@ VxsatMicroInst::generateDisassembly(Addr pc,
return ss.str();
}
VlFFTrimVlMicroOp::VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl,
uint32_t _microIdx, uint32_t _vlen, std::vector<StaticInstPtr>& _microops)
: VectorMicroInst("vlff_trimvl_v_micro", _machInst, VectorConfigOp,
@@ -685,5 +684,115 @@ VlSegDeIntrlvMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *
return ss.str();
}
std::string VsSegMacroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
'(' << registerName(srcRegIdx(0)) << ')' <<
", " << registerName(srcRegIdx(1));
if (!machInst.vm)
ss << ", v0.t";
return ss.str();
}
std::string VsSegMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
'(' << registerName(srcRegIdx(0)) << ')' <<
", "<< registerName(srcRegIdx(1));
if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
ss << ", " << registerName(srcRegIdx(2));
if (!machInst.vm)
ss << ", v0.t";
return ss.str();
}
VsSegIntrlvMicroInst::VsSegIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
uint32_t _dstReg, uint32_t _numSrcs,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _vlen, uint32_t _sizeOfElement)
: VectorArithMicroInst("vsseg_reintrlv_micro", extMachInst,
VectorIntegerArithOp, 0, 0),
vlen(_vlen)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
numSrcs = _numSrcs;
numMicroops = _numMicroops;
field =_field;
sizeOfElement = _sizeOfElement;
microIdx = _microIdx;
micro_vl = _micro_vl;
setDestRegIdx(_numDestRegs++, vecRegClass[VecMemInternalReg0 + field +
(_microIdx * numSrcs)]);
_numTypedDestRegs[VecRegClass]++;
for (uint8_t i=0; i<_numSrcs; i++) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[_dstReg + (i * numMicroops) +
(microIdx)]);
}
}
Fault
VsSegIntrlvMicroInst::execute(ExecContext* xc,
trace::InstRecord* traceData) const
{
const uint32_t elems_per_vreg = micro_vl;
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
auto Vd = tmp_d0.as<uint8_t>();
vreg_t tmp_s;
auto s = tmp_s.as<uint8_t>();
xc->getRegOperand(this, 0, &tmp_s);
s = tmp_s.as<uint8_t>();
uint32_t indexVd = 0;
uint32_t srcReg = (field * elems_per_vreg) % numSrcs;
uint32_t indexs = (field * elems_per_vreg) / numSrcs;
while (indexVd < elems_per_vreg) {
xc->getRegOperand(this, srcReg, &tmp_s);
s = tmp_s.as<uint8_t>();
memcpy(Vd + (indexVd * sizeOfElement),
s + (indexs * sizeOfElement),
sizeOfElement);
indexVd++;
srcReg++;
if (srcReg >= numSrcs) {
srcReg = 0;
indexs++;
}
}
if (traceData)
traceData->setData(vecRegClass, &tmp_d0);
return NoFault;
}
std::string
VsSegIntrlvMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0));
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
ss << ", " << registerName(srcRegIdx(i));
}
ss << ", field: " << field;
return ss.str();
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -655,6 +655,65 @@ class VlSegDeIntrlvMicroInst : public VectorArithMicroInst
const loader::SymbolTable *) const override;
};
class VsSegMacroInst : public VectorMemMacroInst
{
protected:
VsSegMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
class VsSegMicroInst : public VectorMicroInst
{
protected:
Request::Flags memAccessFlags;
uint8_t regIdx;
VsSegMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint32_t _microVl,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _numFields,
uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
{
this->flags[IsStore] = true;
}
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
class VsSegIntrlvMicroInst : public VectorArithMicroInst
{
private:
RegId srcRegIdxArr[NumVecInternalRegs];
RegId destRegIdxArr[1];
uint32_t numSrcs;
uint32_t numMicroops;
uint32_t field;
uint32_t sizeOfElement;
uint32_t micro_vl;
public:
uint32_t vlen;
VsSegIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
uint32_t _dstReg, uint32_t _numSrcs,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _vlen,
uint32_t _sizeOfElement);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(Addr,
const loader::SymbolTable *) const override;
};
} // namespace RiscvISA
} // namespace gem5

View File

@@ -610,7 +610,7 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -618,7 +618,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -626,7 +626,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -634,7 +634,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -642,7 +642,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -650,7 +650,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -658,7 +658,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
@@ -722,7 +722,7 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -730,7 +730,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -738,7 +738,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -746,7 +746,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -754,7 +754,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -762,7 +762,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -770,7 +770,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
@@ -831,7 +831,7 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -839,7 +839,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -847,7 +847,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -855,7 +855,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -863,7 +863,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -871,7 +871,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -879,7 +879,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
@@ -940,7 +940,7 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -948,7 +948,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -956,7 +956,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -964,7 +964,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -972,7 +972,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -980,7 +980,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -988,7 +988,7 @@ decode QUADRANT default Unknown::unknown() {
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
@@ -1363,9 +1363,34 @@ decode QUADRANT default Unknown::unknown() {
0x0: decode MOP {
0x0: decode SUMOP {
0x00: VseOp::vse8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideStoreOp);
0x00: decode NF {
0x00: VseOp::vse8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
format VsWholeOp {
0x8: decode NF {
0x0: vs1r_v({{
@@ -1402,9 +1427,34 @@ decode QUADRANT default Unknown::unknown() {
}
0x5: decode MOP {
0x0: decode SUMOP {
0x00: VseOp::vse16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideStoreOp);
0x00: decode NF {
0x00: VseOp::vse16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei16_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
@@ -1422,9 +1472,34 @@ decode QUADRANT default Unknown::unknown() {
}
0x6: decode MOP {
0x0: decode SUMOP {
0x00: VseOp::vse32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideStoreOp);
0x00: decode NF {
0x00: VseOp::vse32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei32_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
@@ -1442,9 +1517,34 @@ decode QUADRANT default Unknown::unknown() {
}
0x7: decode MOP {
0x0: decode SUMOP {
0x00: VseOp::vse64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideStoreOp);
0x00: decode NF {
0x00: VseOp::vse64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei64_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];

View File

@@ -258,3 +258,16 @@ def format VlSegOp(
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
'VlSegMacroInst', exec_template_base='VlSeg')
}};
def format VsSegOp(
memacc_code,
ea_code={{
EA = Rs1 + mem_size * (microIdx + (field * numMicroops));
}},
mem_flags=[],
inst_flags=[]
) {{
(header_output, decoder_output, decode_block, exec_output) = \
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
'VsSegMacroInst', exec_template_base='VsSeg')
}};

View File

@@ -1867,6 +1867,235 @@ Fault
}};
def template VsSegConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
size_t NFIELDS = machInst.nf + 1;
StaticInstPtr microop;
uint32_t size_per_elem = width_EEW(_machInst.width) / 8;
if (micro_vl == 0) {
microop = new VectorNopMicroInst(_machInst);
this->microops.push_back(microop);
} else {
for (int f = 0; f < NFIELDS; ++f) {
remaining_vl = this->vl;
micro_vl = std::min(remaining_vl, micro_vlmax);
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new VsSegIntrlvMicroInst(_machInst, micro_vl,
_machInst.vs3, NFIELDS, i, num_microops, f, vlen,
size_per_elem);
this->microops.push_back(microop);
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
}
}
for (int f = 0; f < NFIELDS; ++f) {
remaining_vl = this->vl;
micro_vl = std::min(remaining_vl, micro_vlmax);
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vl, i,
num_microops, f, NFIELDS, vlen);
microop->setDelayedCommit();
microop->setFlag(IsStore);
this->microops.push_back(microop);
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
}
}
}
this->microops.front()->setFlag(IsFirstMicroop);
this->microops.back()->setFlag(IsLastMicroop);
this->flags[IsVector] = true;
}
}};
def template VsSegMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
private:
// rs1, rs2, vd, vm
RegId srcRegIdxArr[4];
RegId destRegIdxArr[1];
uint32_t field;
uint32_t numFields;
uint32_t numMicroops;
public:
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field,
uint32_t _numFields, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
trace::InstRecord *) const override;
using %(base_class)s::generateDisassembly;
};
}};
def template VsSegMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl,
uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field,
uint32_t _numFields, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl,
_microIdx, _numMicroops, _field, _numFields, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
field = _field;
numFields = _numFields;
numMicroops = _numMicroops;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
(field * numMicroops)]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
this->flags[IsVector] = true;
this->flags[IsStore] = true;
}
}};
def template VsSegMicroExecute {{
Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
Addr EA;
const size_t eewb = width_EEW(machInst.width) / 8;
const size_t mem_size = eewb * microVl;
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
if(!machInst.vm) {
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
v0 = tmp_v0.as<uint8_t>();
}
%(op_decl)s;
%(op_rd)s;
%(set_vlen)s;
%(ea_code)s;
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
panic_if((pow(2, vlmul) * this->numFields) > 8,
"LMUL value is illegal for vsseg inst");
const size_t micro_vlmax = vlen / width_EEW(machInst.width);
std::vector<bool> byte_enable(mem_size, false);
size_t ei;
for (size_t i = 0; i < microVl; i++) {
ei = i + micro_vlmax * microIdx;
if (machInst.vm || elem_mask_vseg(v0, ei + (field * microVl),
this->numFields)) {
%(memacc_code)s;
auto it = byte_enable.begin() + i * eewb;
std::fill(it, it + eewb, true);
}
}
Fault fault;
fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
nullptr, byte_enable);
return fault;
}
}};
def template VsSegMicroInitiateAcc {{
Fault
%(class_name)s::initiateAcc(ExecContext* xc,
trace::InstRecord* traceData) const
{
Addr EA;
const size_t eewb = width_EEW(machInst.width) / 8;
const size_t mem_size = eewb * microVl;
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
if(!machInst.vm) {
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
v0 = tmp_v0.as<uint8_t>();
}
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
panic_if((pow(2, vlmul) * this->numFields) > 8,
"LMUL value is illegal for vsseg inst");
const size_t micro_vlmax = vlen / width_EEW(machInst.width);
std::vector<bool> byte_enable(mem_size, false);
size_t ei;
for (size_t i = 0; i < microVl; i++) {
ei = i + micro_vlmax * microIdx;
if (machInst.vm || elem_mask_vseg(v0, ei + (field * microVl),
this->numFields)) {
%(memacc_code)s;
auto it = byte_enable.begin() + i * eewb;
std::fill(it, it + eewb, true);
}
}
Fault fault;
fault = xc->writeMem(Mem.as<uint8_t>(), mem_size, EA, memAccessFlags,
nullptr, byte_enable);
return fault;
}
}};
def template VsSegMicroCompleteAcc {{
Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
trace::InstRecord *traceData) const
{
return NoFault;
}
}};
def template VMemBaseDecodeBlock {{
return new %(class_name)s(machInst, vlen);

View File

@@ -308,7 +308,7 @@ elem_mask(const T* vs, const int index)
template<typename T>
inline int
elem_mask_vlseg(const T* vs, const int elem, const int num_fields)
elem_mask_vseg(const T* vs, const int elem, const int num_fields)
{
int index = floor(elem / num_fields);
static_assert(std::is_integral_v<T>);

View File

@@ -102,6 +102,7 @@ class OpClass(Enum):
"InstPrefetch",
"VectorUnitStrideLoad",
"VectorUnitStrideStore",
"VectorUnitStrideSegmentedStore",
"VectorUnitStrideMaskLoad",
"VectorUnitStrideMaskStore",
"VectorStridedLoad",

View File

@@ -135,6 +135,8 @@ static const OpClass VectorMiscOp = enums::VectorMisc;
static const OpClass VectorIntegerExtensionOp = enums::VectorIntegerExtension;
static const OpClass VectorUnitStrideSegmentedLoadOp = enums::VectorUnitStrideSegmentedLoad;
static const OpClass VectorConfigOp = enums::VectorConfig;
static const OpClass VectorUnitStrideSegmentedStoreOp
= enums::VectorUnitStrideSegmentedStore;
static const OpClass Num_OpClasses = enums::Num_OpClass;
} // namespace gem5