arch-riscv: add agnostic opt to vector tail/mask for mem insts

Change-Id: I567a110806b77d5576810706bd3e30185b0e0b75
This commit is contained in:
Saúl Adserias
2024-05-14 14:44:30 +02:00
committed by Ivana Mitrovic
parent 09781fd78f
commit 73c364519a
6 changed files with 236 additions and 124 deletions

View File

@@ -140,8 +140,11 @@ class VecRegContainer
VecRegContainer() {}
VecRegContainer(const VecRegContainer &) = default;
/** Set the container. */
void set(uint8_t val) { memset(container.data(), val, SIZE); }
/** Zero the container. */
void zero() { memset(container.data(), 0, SIZE); }
void zero() { set(0); }
/** Assignment operators. */
/** @{ */

View File

@@ -444,14 +444,14 @@ VMaskMergeMicroInst::execute(ExecContext* xc,
uint32_t vlenb = pc_ptr->as<PCState>().vlenb();
const uint32_t elems_per_vreg = vlenb / elemSize;
size_t bit_cnt = elems_per_vreg;
// mask tails are always treated as agnostic: writting 1s
tmp_d0.set(0xff);
vreg_t tmp_s;
xc->getRegOperand(this, 0, &tmp_s);
auto s = tmp_s.as<uint8_t>();
// cp the first result and tail
memcpy(Vd, s, vlenb);
for (uint8_t i = 1; i < this->_numSrcRegs; i++) {
for (uint8_t i = 0; i < this->_numSrcRegs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
auto s = tmp_s.as<uint8_t>();
if (elems_per_vreg < 8) {
const uint32_t m = (1 << elems_per_vreg) - 1;
const uint32_t mask = m << (i * elems_per_vreg % 8);
@@ -658,7 +658,13 @@ VlSegDeIntrlvMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) c
for (uint32_t i = 0; i < numSrcs; i++) {
xc->getRegOperand(this, i, &tmp_s);
s = tmp_s.as<uint8_t>();
while(index < (i + 1) * elems_per_vreg)
// copy tail/inactive elements from vtmp src
if (i == field) {
tmp_d0 = tmp_s;
}
while (index < (i + 1) * elems_per_vreg)
{
memcpy(Vd + (elem * sizeOfElement),
s + ((index % elems_per_vreg) * sizeOfElement),
@@ -705,8 +711,6 @@ std::string VsSegMicroInst::generateDisassembly(Addr pc,
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " <<
'(' << registerName(srcRegIdx(0)) << ')' <<
", "<< registerName(srcRegIdx(1));
if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0)
ss << ", " << registerName(srcRegIdx(2));
if (!machInst.vm)
ss << ", v0.t";
return ss.str();
@@ -797,5 +801,134 @@ VsSegIntrlvMicroInst::generateDisassembly(Addr pc,
return ss.str();
}
VCpyVsMicroInst::VCpyVsMicroInst(ExtMachInst _machInst, uint32_t _microIdx,
uint8_t _vsRegIdx)
: VectorArithMicroInst("vcpyvs_v_micro", _machInst, SimdMiscOp, 0,
_microIdx)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[VecMemInternalReg0 + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, vecRegClass[_vsRegIdx + _microIdx]);
}
Fault
VCpyVsMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
{
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
// copy vector source reg to vtmp
vreg_t& vtmp = *(vreg_t *)xc->getWritableRegOperand(this, 0);
vreg_t vs;
xc->getRegOperand(this, 0, &vs);
vtmp = vs;
if (traceData) {
traceData->setData(vecRegClass, &vtmp);
}
return NoFault;
}
std::string
VCpyVsMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "
<< registerName(srcRegIdx(0));
return ss.str();
}
VPinVdMicroInst::VPinVdMicroInst(ExtMachInst _machInst, uint32_t _microIdx,
uint32_t _numVdPins, bool _hasVdOffset)
: VectorArithMicroInst("vpinvd_v_micro", _machInst, SimdMiscOp, 0,
_microIdx)
, hasVdOffset(_hasVdOffset)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::destRegIdxArr));
_numSrcRegs = 0;
_numDestRegs = 0;
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
if (!_machInst.vtype8.vta || (!_machInst.vm && !_machInst.vtype8.vma)
|| hasVdOffset) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
}
RegId Vd = destRegIdx(0);
Vd.setNumPinnedWrites(_numVdPins);
setDestRegIdx(0, Vd);
}
Fault
VPinVdMicroInst::execute(ExecContext* xc, trace::InstRecord* traceData) const
{
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
// tail/mask policy: both undisturbed if one is, 1s if none
vreg_t& vd = *(vreg_t *)xc->getWritableRegOperand(this, 0);
if (!machInst.vtype8.vta || (!machInst.vm && !machInst.vtype8.vma)
|| hasVdOffset) {
vreg_t old_vd;
xc->getRegOperand(this, 0, &old_vd);
vd = old_vd;
} else {
vd.set(0xff);
}
if (traceData) {
traceData->setData(vecRegClass, &vd);
}
return NoFault;
}
std::string
VPinVdMicroInst::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", ";
if (!machInst.vtype8.vta || (!machInst.vm && !machInst.vtype8.vma)
|| hasVdOffset) {
ss << registerName(srcRegIdx(0));
} else {
ss << "~0";
}
return ss.str();
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -714,6 +714,35 @@ class VsSegIntrlvMicroInst : public VectorArithMicroInst
const loader::SymbolTable *) const override;
};
class VCpyVsMicroInst : public VectorArithMicroInst
{
private:
RegId srcRegIdxArr[1];
RegId destRegIdxArr[1];
public:
VCpyVsMicroInst(ExtMachInst _machInst, uint32_t _microIdx,
uint8_t _vsRegIdx);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
class VPinVdMicroInst : public VectorArithMicroInst
{
private:
RegId srcRegIdxArr[1];
RegId destRegIdxArr[1];
bool hasVdOffset;
public:
VPinVdMicroInst(ExtMachInst _machInst, uint32_t _microIdx,
uint32_t _numVdPins, bool _hasVdOffset=false);
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::string generateDisassembly(
Addr pc, const loader::SymbolTable *symtab) const override;
};
} // namespace RiscvISA
} // namespace gem5

View File

@@ -607,8 +607,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideLoadOp);
format VlSegOp {
@@ -616,56 +614,42 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x02: vlseg3e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x03: vlseg4e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x04: vlseg5e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x05: vlseg6e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x06: vlseg7e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x07: vlseg8e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
}
@@ -693,8 +677,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=SimdUnitStrideFaultOnlyFirstLoadOp);
}
@@ -719,8 +701,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideLoadOp);
format VlSegOp {
@@ -728,56 +708,42 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x02: vlseg3e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x03: vlseg4e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x04: vlseg5e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x05: vlseg6e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x06: vlseg7e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x07: vlseg8e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
}
@@ -802,8 +768,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=SimdUnitStrideFaultOnlyFirstLoadOp);
}
@@ -828,8 +792,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideLoadOp);
format VlSegOp {
@@ -837,56 +799,42 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x02: vlseg3e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x03: vlseg4e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x04: vlseg5e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x05: vlseg6e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x06: vlseg7e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x07: vlseg8e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
}
@@ -911,8 +859,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=SimdUnitStrideFaultOnlyFirstLoadOp);
}
@@ -937,8 +883,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideLoadOp);
format VlSegOp {
@@ -946,56 +890,42 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x02: vlseg3e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x03: vlseg4e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x04: vlseg5e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x05: vlseg6e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x06: vlseg7e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
0x07: vlseg8e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideSegmentedLoadOp);
}
@@ -1020,8 +950,6 @@ decode QUADRANT default Unknown::unknown() {
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=SimdUnitStrideFaultOnlyFirstLoadOp);
}

View File

@@ -32,7 +32,7 @@ let {{
def setVlen():
return "uint32_t vlen = VlenbBits * 8;\n"
def setVlenb():
return "uint32_t vlenb = VlenbBits;\n"
return "[[maybe_unused]] uint32_t vlenb = VlenbBits;\n"
def declareVMemTemplate(class_name):
return f'''
@@ -55,6 +55,17 @@ def getFaultCode():
}
'''
def getTailMaskPolicyCode():
return '''
if (!machInst.vtype8.vta || (!machInst.vm && !machInst.vtype8.vma)) {
RiscvISA::vreg_t old_vd;
xc->getRegOperand(this, 1, &old_vd);
tmp_d0 = old_vd;
} else {
tmp_d0.set(0xff);
}
'''
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
inst_flags, base_class, postacc_code='',
declare_template_base=VMemMacroDeclare,
@@ -94,7 +105,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
'set_vlenb': setVlenb(),
'set_vlen': setVlen(),
'declare_vmem_template': declareVMemTemplate(Name + 'Micro'),
'fault_code': getFaultCode() if fault_only_first else ''},
'fault_code': getFaultCode() if fault_only_first else '',
'tail_mask_policy_code': getTailMaskPolicyCode()},
inst_flags)
if mem_flags:

View File

@@ -128,7 +128,9 @@ def template VleMicroConstructor {{
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
if (!_machInst.vtype8.vta || (!_machInst.vm && !_machInst.vtype8.vma)) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]);
}
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
@@ -245,6 +247,9 @@ Fault
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
// tail/mask policy: both undisturbed if one is, 1s if none
%(tail_mask_policy_code)s
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
if(!machInst.vm) {
@@ -815,6 +820,17 @@ def template VlStrideConstructor {{
microop = new VectorNopMicroInst(_machInst);
this->microops.push_back(microop);
}
const uint8_t num_pinvd_microops = ceil((float) this->vl /
num_elems_per_vreg);
for (uint32_t i = 0; i < num_pinvd_microops; i++) {
uint32_t vdNumElems = (vl >= num_elems_per_vreg*(i+1))
? num_elems_per_vreg : vl-num_elems_per_vreg*i;
microop = new VPinVdMicroInst(machInst, i, vdNumElems);
microop->setFlag(IsDelayedCommit);
this->microops.push_back(microop);
}
for (int i = 0; micro_vl > 0; ++i) {
for (int j = 0; j < micro_vl; ++j) {
microop = new %(class_name)sMicro(machInst, i, j, micro_vl);
@@ -838,7 +854,7 @@ def template VlStrideMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
private:
// rs1, rs2, vd, vm
// rs1, rs2, vtmp0, vm
RegId srcRegIdxArr[4];
RegId destRegIdxArr[1];
public:
@@ -869,8 +885,8 @@ def template VlStrideMicroConstructor {{
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]);
// vtmp0 as dummy src reg to create dependency with pin vd micro
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
@@ -983,14 +999,6 @@ Fault
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
constexpr uint8_t elem_size = sizeof(Vd[0]);
RiscvISA::vreg_t old_vd;
decltype(Vd) old_Vd = nullptr;
// We treat agnostic as undistrubed
xc->getRegOperand(this, 2, &old_vd);
old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >();
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
if (!machInst.vm) {
@@ -998,17 +1006,6 @@ Fault
v0 = tmp_v0.as<uint8_t>();
}
if (microIdx == 0) {
// treat vma as vmu
// if (machInst.vtype8.vma == 0)
memcpy(Vd, old_Vd, microVl * elem_size);
// treat vta as vtu
// if (machInst.vtype8.vta == 0)
memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size);
} else {
memcpy(Vd, old_Vd, vlenb);
}
size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx;
if (machInst.vm || elem_mask(v0, ei)) {
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
@@ -1220,6 +1217,21 @@ template<typename ElemType>
microop = new VectorNopMicroInst(_machInst);
this->microops.push_back(microop);
}
const uint32_t vd_vlmax = vlenb / vd_eewb;
const uint8_t num_pinvdcpyvs_microops = ceil((float) this->vl/vd_vlmax);
for (uint32_t i = 0; i < num_pinvdcpyvs_microops; i++) {
uint32_t vdNumElems = (vl >= vd_vlmax*(i+1)) ? vd_vlmax:vl-vd_vlmax*i;
microop = new VCpyVsMicroInst(machInst, i, machInst.vs2);
microop->setFlag(IsDelayedCommit);
this->microops.push_back(microop);
microop = new VPinVdMicroInst(machInst, i, vdNumElems);
microop->setFlag(IsDelayedCommit);
this->microops.push_back(microop);
}
for (uint32_t i = 0; micro_vl > 0; i++) {
for (uint32_t j = 0; j < micro_vl; ++j) {
uint32_t vdRegIdx = i / vd_split_num;
@@ -1251,8 +1263,8 @@ template<typename ElemType>
class %(class_name)s : public %(base_class)s
{
private:
// rs1, vs2, vd, vm
RegId srcRegIdxArr[4];
// rs1, vs2, vm
RegId srcRegIdxArr[3];
RegId destRegIdxArr[1];
public:
%(class_name)s(ExtMachInst _machInst,
@@ -1283,9 +1295,7 @@ template<typename ElemType>
setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]);
// We treat agnostic as undistrubed
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _vs2RegIdx]);
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
@@ -1408,12 +1418,6 @@ Fault
constexpr uint8_t elem_size = sizeof(Vd[0]);
RiscvISA::vreg_t old_vd;;
decltype(Vd) old_Vd = nullptr;
// We treat agnostic as undistrubed
xc->getRegOperand(this, 2, &old_vd);
old_Vd = old_vd.as<std::remove_reference_t<decltype(Vd[0])> >();
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
if (!machInst.vm) {
@@ -1421,8 +1425,6 @@ Fault
v0 = tmp_v0.as<uint8_t>();
}
memcpy(Vd, old_Vd, vlenb);
size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx;
if (machInst.vm || elem_mask(v0, ei)) {
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
@@ -1690,8 +1692,8 @@ def template VlSegMicroDeclare {{
class %(class_name)s : public %(base_class)s
{
private:
// rs1, rs2, vd, vm
RegId srcRegIdxArr[4];
// rs1, vd, vm
RegId srcRegIdxArr[3];
RegId destRegIdxArr[1];
uint32_t field;
uint32_t numFields;
@@ -1724,8 +1726,10 @@ def template VlSegMicroConstructor {{
(field * numMicroops)]);
_numTypedDestRegs[VecRegClass]++;
setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]);
setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + _microIdx +
(field * numMicroops)]);
if (!_machInst.vtype8.vta || (!_machInst.vm && !_machInst.vtype8.vma)) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx
+ (field * numMicroops)]);
}
if (!_machInst.vm) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);
}
@@ -1843,6 +1847,9 @@ Fault
status.vs = VPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
// tail/mask policy: both undisturbed if one is, 1s if none
%(tail_mask_policy_code)s
RiscvISA::vreg_t tmp_v0;
uint8_t *v0;
if(!machInst.vm) {