arch-riscv: adding vector unit-stride segment loads to RISC-V (#851)

This commit adds support for vector unit-stride segment load operations
for RISC-V (vlseg<NF>e<X>). This implementation is based in two types of
microops:
- VlSeg microops that load data as it is organized in memory in structs
of several fields.
- VectorDeIntrlv microops that properly deinterleave structs into
destination registers.

Gem5 issue: https://github.com/gem5/gem5/issues/382
This commit is contained in:
Ivan Fernandez
2024-03-06 20:27:06 +01:00
committed by GitHub
parent b930c57d54
commit f6c61836b3
9 changed files with 683 additions and 28 deletions

View File

@@ -588,5 +588,102 @@ VlFFTrimVlMicroOp::generateDisassembly(Addr pc,
return ss.str();
}
std::string VlSegMacroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
'(' << registerName(srcRegIdx(0)) << ')' <<
", " << registerName(srcRegIdx(1));
if (!machInst.vm)
ss << ", v0.t";
return ss.str();
}
std::string VlSegMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
'(' << registerName(srcRegIdx(0)) << ')' <<
", "<< registerName(srcRegIdx(1));
if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
ss << ", " << registerName(srcRegIdx(2));
if (!machInst.vm)
ss << ", v0.t";
return ss.str();
}
VlSegDeIntrlvMicroInst::VlSegDeIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
uint32_t _dstReg, uint32_t _numSrcs,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _vlen, uint32_t _sizeOfElement)
: VectorArithMicroInst("vlseg_deintrlv_micro", extMachInst,
VectorIntegerArithOp, 0, 0),
vlen(_vlen)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
numSrcs = _numSrcs;
numMicroops = _numMicroops;
field =_field;
sizeOfElement = _sizeOfElement;
microIdx = _microIdx;
micro_vl = _micro_vl;
setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]);
_numTypedDestRegs[VecRegClass]++;
for (uint32_t i=0; i < _numSrcs; i++) {
uint32_t index = VecMemInternalReg0 + i + (microIdx * _numSrcs);
setSrcRegIdx(_numSrcRegs++, vecRegClass[index]);
}
}
Fault
VlSegDeIntrlvMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
{
vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0);
auto Vd = tmp_d0.as<uint8_t>();
const uint32_t elems_per_vreg = micro_vl;
vreg_t tmp_s;
auto s = tmp_s.as<uint8_t>();
uint32_t elem = 0;
uint32_t index = field;
for (uint32_t i = 0; i < numSrcs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
while(index < (i + 1) * elems_per_vreg)
{
memcpy(Vd + (elem * sizeOfElement),
s + ((index % elems_per_vreg) * sizeOfElement),
sizeOfElement);
index += numSrcs;
elem++;
}
}
if (traceData)
traceData->setData(vecRegClass, &tmp_d0);
return NoFault;
}
std::string
VlSegDeIntrlvMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab)
const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0));
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
ss << ", " << registerName(srcRegIdx(i));
}
ss << ", field: " << field;
return ss.str();
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -596,6 +596,65 @@ class VlFFTrimVlMicroOp : public VectorMicroInst
const override;
};
class VlSegMacroInst : public VectorMemMacroInst
{
protected:
VlSegMacroInst(const char* mnem, ExtMachInst _machInst,
OpClass __opClass, uint32_t _vlen)
: VectorMemMacroInst(mnem, _machInst, __opClass, _vlen)
{}
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
class VlSegMicroInst : public VectorMicroInst
{
protected:
Request::Flags memAccessFlags;
uint8_t regIdx;
VlSegMicroInst(const char *mnem, ExtMachInst _machInst,
OpClass __opClass, uint32_t _microVl,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _numFields,
uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
{
this->flags[IsLoad] = true;
}
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
class VlSegDeIntrlvMicroInst : public VectorArithMicroInst
{
private:
RegId srcRegIdxArr[NumVecInternalRegs];
RegId destRegIdxArr[1];
uint32_t numSrcs;
uint32_t numMicroops;
uint32_t field;
uint32_t sizeOfElement;
uint32_t micro_vl;
public:
uint32_t vlen;
VlSegDeIntrlvMicroInst(ExtMachInst extMachInst, uint32_t _micro_vl,
uint32_t _dstReg, uint32_t _numSrcs,
uint32_t _microIdx, uint32_t _numMicroops,
uint32_t _field, uint32_t _vlen,
uint32_t _sizeOfElement);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(Addr,
const loader::SymbolTable *) const override;
};
} // namespace RiscvISA
} // namespace gem5

View File

@@ -599,6 +599,7 @@ decode QUADRANT default Unknown::unknown() {
0x0: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle8_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
@@ -607,6 +608,65 @@ decode QUADRANT default Unknown::unknown() {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e8_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re8_v({{
@@ -651,6 +711,7 @@ decode QUADRANT default Unknown::unknown() {
}
0x5: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle16_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
@@ -659,6 +720,65 @@ decode QUADRANT default Unknown::unknown() {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e16_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re16_v({{
@@ -700,6 +820,7 @@ decode QUADRANT default Unknown::unknown() {
}
0x6: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle32_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
@@ -708,6 +829,65 @@ decode QUADRANT default Unknown::unknown() {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e32_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re32_v({{
@@ -749,6 +929,7 @@ decode QUADRANT default Unknown::unknown() {
}
0x7: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle64_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
@@ -757,6 +938,65 @@ decode QUADRANT default Unknown::unknown() {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e64_v({{
if ((machInst.vm || elem_mask_vlseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re64_v({{

View File

@@ -245,3 +245,16 @@ def format VsIndexOp(
decode_template=VMemSplitTemplateDecodeBlock
)
}};
def format VlSegOp(
memacc_code,
ea_code={{
EA = Rs1 + mem_size * (microIdx + (field * numMicroops));
}},
mem_flags=[],
inst_flags=[]
) {{
(header_output, decoder_output, decode_block, exec_output) = \
VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags,
'VlSegMacroInst', exec_template_base='VlSeg')
}};

View File

@@ -1636,6 +1636,238 @@ Fault
}};
def template VlSegConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen)
{
%(set_reg_idx_arr)s;
%(constructor)s;
const int32_t micro_vlmax = vlen / width_EEW(_machInst.width);
const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax));
int32_t remaining_vl = this->vl;
int32_t micro_vl = std::min(remaining_vl, micro_vlmax);
size_t NFIELDS = machInst.nf + 1;
StaticInstPtr microop;
uint32_t size_per_elem = width_EEW(_machInst.width) / 8;
if (micro_vl == 0) {
microop = new VectorNopMicroInst(_machInst);
this->microops.push_back(microop);
} else {
for (int f = 0; f < NFIELDS; ++f) {
remaining_vl = this->vl;
micro_vl = std::min(remaining_vl, micro_vlmax);
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new %(class_name)sMicro(_machInst, micro_vl, i, num_microops, f, NFIELDS, vlen);
microop->setDelayedCommit();
microop->setFlag(IsLoad);
this->microops.push_back(microop);
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
}
}
for (int f = 0; f < NFIELDS; ++f) {
remaining_vl = this->vl;
micro_vl = std::min(remaining_vl, micro_vlmax);
for (int i = 0; i < num_microops && micro_vl > 0; ++i) {
microop = new VlSegDeIntrlvMicroInst(_machInst, micro_vl, _machInst.vd + i + (f * num_microops),
NFIELDS, i, num_microops, f, vlen, size_per_elem);
this->microops.push_back(microop);
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
}
}
}
this->microops.front()->setFlag(IsFirstMicroop);
this->microops.back()->setFlag(IsLastMicroop);
this->flags[IsVector] = true;
}
}};
def template VlSegMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
private:
// rs1, rs2, vd, vm
RegId srcRegIdxArr[4];
RegId destRegIdxArr[1];
uint32_t field;
uint32_t numFields;
uint32_t numMicroops;
public:
%(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field, uint32_t _numFields, uint32_t _vlen);
Fault execute(ExecContext *, trace::InstRecord *) const override;
Fault initiateAcc(ExecContext *, trace::InstRecord *) const override;
Fault completeAcc(PacketPtr, ExecContext *,
trace::InstRecord *) const override;
using %(base_class)s::generateDisassembly;
};
}};
def template VlSegMicroConstructor {{
%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint32_t _microIdx, uint32_t _numMicroops, uint32_t _field, uint32_t _numFields, uint32_t _vlen)
: %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx , _numMicroops, _field, _numFields, _vlen)
{
%(set_reg_idx_arr)s;
_numSrcRegs = 0;
_numDestRegs = 0;
field = _field;
numFields = _numFields;
numMicroops = _numMicroops;
setDestRegIdx(_numDestRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
(field * numMicroops)]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
(field * numMicroops)]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
}
}};
def template VlSegMicroExecute {{
Fault
%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
Addr EA;
uint32_t mem_size = width_EEW(machInst.width) / 8 * microVl;
%(op_decl)s;
%(op_rd)s;
%(set_vlen)s;
%(ea_code)s;
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
panic_if((pow(2, vlmul) * this->numFields) > 8, "LMUL value is illegal for vlseg inst");
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
if(!machInst.vm) {
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
v0 = tmp_v0.as<uint8_t>();
}
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
byte_enable);
if (fault != NoFault)
return fault;
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);
size_t ei;
for (size_t i = 0; i < micro_elems; i++) {
ei = i + micro_vlmax * microIdx;
%(memacc_code)s;
}
%(op_wb)s;
return fault;
}
}};
def template VlSegMicroInitiateAcc {{
Fault
%(class_name)s::initiateAcc(ExecContext* xc,
trace::InstRecord* traceData) const
{
Addr EA;
uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl;
%(op_decl)s;
%(op_rd)s;
%(ea_code)s;
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
if (machInst.vill)
return std::make_shared<IllegalInstFault>("VILL is set", machInst);
const int64_t vlmul = vtype_vlmul(machInst.vtype8);
panic_if((pow(2, vlmul) * this->numFields) > 8, "LMUL value is illegal for vlseg inst");
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
byte_enable);
return fault;
}
}};
def template VlSegMicroCompleteAcc {{
Fault
%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc,
trace::InstRecord *traceData) const
{
%(op_decl)s;
%(op_rd)s;
%(set_vlen)s;
STATUS status = xc->readMiscReg(MISCREG_STATUS);
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
if(!machInst.vm) {
xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0);
v0 = tmp_v0.as<uint8_t>();
}
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);
size_t ei;
for (size_t i = 0; i < micro_elems; i++) {
ei = i + micro_vlmax * microIdx;
%(memacc_code)s;
}
%(op_wb)s;
return NoFault;
}
}};
def template VMemBaseDecodeBlock {{
return new %(class_name)s(machInst, vlen);
}};

View File

@@ -306,6 +306,17 @@ elem_mask(const T* vs, const int index)
return (vs[idx] >> pos) & 1;
}
template<typename T>
inline int
elem_mask_vlseg(const T* vs, const int elem, const int num_fields)
{
int index = floor(elem / num_fields);
static_assert(std::is_integral_v<T>);
int idx = index / (sizeof(T)*8);
int pos = index % (sizeof(T)*8);
return (vs[idx] >> pos) & 1;
}
template<typename FloatType, typename IntType = decltype(FloatType::v)> auto
ftype(IntType a) -> FloatType
{

View File

@@ -112,6 +112,7 @@ class OpClass(Enum):
"VectorWholeRegisterLoad",
"VectorWholeRegisterStore",
"VectorIntegerArith",
"VectorUnitStrideSegmentedLoad",
"VectorFloatArith",
"VectorFloatConvert",
"VectorIntegerReduce",

View File

@@ -261,6 +261,7 @@ class MinorDefaultVecFU(MinorFU):
"VectorIndexedLoad",
"VectorIndexedStore",
"VectorUnitStrideFaultOnlyFirstLoad",
"VectorUnitStrideSegmentedLoad",
"VectorWholeRegisterLoad",
"VectorWholeRegisterStore",
"VectorIntegerArith",

View File

@@ -133,6 +133,7 @@ static const OpClass VectorIntegerReduceOp = enums::VectorIntegerReduce;
static const OpClass VectorFloatReduceOp = enums::VectorFloatReduce;
static const OpClass VectorMiscOp = enums::VectorMisc;
static const OpClass VectorIntegerExtensionOp = enums::VectorIntegerExtension;
static const OpClass VectorUnitStrideSegmentedLoadOp = enums::VectorUnitStrideSegmentedLoad;
static const OpClass VectorConfigOp = enums::VectorConfig;
static const OpClass Num_OpClasses = enums::Num_OpClass;