From 804f1373252ff1e4a93c48a1724bbb12ef616d3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sa=C3=BAl?= <33020671+saul44203@users.noreply.github.com> Date: Thu, 8 Feb 2024 18:15:58 +0100 Subject: [PATCH] arch-riscv: add unit-stride fault-only-first loads (i.e. vle*ff) (#794) This patch provides unit-stride fault-only-first loads (i.e. vle*ff) for the RISC-V architecture. They are implemented within the regular unit-stride load (i.e. vle*). A snippet named `fault_code` is inserted with templating to change their behaviour to fault-only-first. A part from this, a new micro based on the vset\*vl\* instructions (VlFFTrimVlMicroOp) is inserted as the last micro in the macro constructor to trim the VL to it's corresponding length based on the faulting index. This trimming micro waits for the load micros to finish (via data dependency) and has a reference to the other micros to check whether they faulted or not. The new VL is calculated with the VL of each micro, stopping on the first faulting one (if there's such a fault). I've tested this with VLEN=128,256,...,16384 and all the corresponding SEW+LMUL configurations. Change-Id: I7b937f6bcb396725461bba4912d2667f3b22f955 --- src/arch/riscv/faults.cc | 18 +++++ src/arch/riscv/faults.hh | 12 +++ src/arch/riscv/insts/vector.cc | 87 +++++++++++++++++++++ src/arch/riscv/insts/vector.hh | 24 ++++++ src/arch/riscv/isa/decoder.isa | 32 ++++++++ src/arch/riscv/isa/formats/vector_mem.isa | 19 ++++- src/arch/riscv/isa/templates/vector_mem.isa | 20 ++++- 7 files changed, 208 insertions(+), 4 deletions(-) diff --git a/src/arch/riscv/faults.cc b/src/arch/riscv/faults.cc index 2e583e3680..634171fc5c 100644 --- a/src/arch/riscv/faults.cc +++ b/src/arch/riscv/faults.cc @@ -248,5 +248,23 @@ SyscallFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst) tc->getSystemPtr()->workload->syscall(tc); } +bool +getFaultVAddr(Fault fault, Addr &va) +{ + auto addr_fault = dynamic_cast(fault.get()); + if (addr_fault) { + va = addr_fault->trap_value(); + return true; + } + + auto pgt_fault = dynamic_cast(fault.get()); + if (pgt_fault) { + va = pgt_fault->getFaultVAddr(); + return true; + } + + return false; +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh index 36fec182e7..6f4245093f 100644 --- a/src/arch/riscv/faults.hh +++ b/src/arch/riscv/faults.hh @@ -330,6 +330,18 @@ class SyscallFault : public RiscvFault void invokeSE(ThreadContext *tc, const StaticInstPtr &inst) override; }; +/** + * Returns true if the fault passed as a first argument was triggered + * by a memory access, false otherwise. + * If true it is storing the faulting address in the va argument + * + * @param fault generated fault + * @param va function will modify this passed-by-reference parameter + * with the correct faulting virtual address + * @return true if va contains a valid value, false otherwise + */ +bool getFaultVAddr(Fault fault, Addr &va); + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 7f17bb055e..b6d052ce4b 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -501,5 +501,92 @@ VxsatMicroInst::generateDisassembly(Addr pc, return ss.str(); } + +VlFFTrimVlMicroOp::VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl, + uint32_t _microIdx, uint32_t _vlen, std::vector& _microops) + : VectorMicroInst("vlff_trimvl_v_micro", _machInst, VectorConfigOp, + _microVl, _microIdx, _vlen), + microops(_microops) +{ + setRegIdxArrays( + reinterpret_cast( + &std::remove_pointer_t::srcRegIdxArr), + nullptr + ); + + // Create data dependency with load micros + for (uint8_t i=0; iflags[IsControl] = true; + this->flags[IsIndirectControl] = true; + this->flags[IsInteger] = true; + this->flags[IsUncondControl] = true; +} + +uint32_t +VlFFTrimVlMicroOp::calcVl() const +{ + uint32_t vl = 0; + for (uint8_t i=0; i(*microops[i]); + vl += micro.faultIdx; + + if (micro.trimVl) + break; + } + return vl; +} + +Fault +VlFFTrimVlMicroOp::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + auto tc = xc->tcBase(); + MISA misa = xc->readMiscReg(MISCREG_ISA); + STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { + return std::make_shared( + "RVV is disabled or VPU is off", machInst); + } + + PCState pc; + set(pc, xc->pcState()); + + uint32_t new_vl = calcVl(); + + tc->setMiscReg(MISCREG_VSTART, 0); + + RegVal final_val = new_vl; + if (traceData) { + traceData->setData(miscRegClass, final_val); + } + + pc.vl(new_vl); + xc->pcState(pc); + + return NoFault; +} + +std::unique_ptr +VlFFTrimVlMicroOp::branchTarget(ThreadContext *tc) const +{ + PCStateBase *pc_ptr = tc->pcState().clone(); + + uint32_t new_vl = calcVl(); + + pc_ptr->as().vl(new_vl); + return std::unique_ptr{pc_ptr}; +} + +std::string +VlFFTrimVlMicroOp::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << " vl"; + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index fb36f0809d..fd891dad42 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -31,6 +31,7 @@ #include +#include "arch/riscv/faults.hh" #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" @@ -306,6 +307,10 @@ class VseMacroInst : public VectorMemMacroInst class VleMicroInst : public VectorMicroInst { + public: + mutable bool trimVl; + mutable uint32_t faultIdx; + protected: Request::Flags memAccessFlags; @@ -313,6 +318,7 @@ class VleMicroInst : public VectorMicroInst uint32_t _microVl, uint32_t _microIdx, uint32_t _vlen) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, _vlen) + , trimVl(false), faultIdx(_microVl) { this->flags[IsLoad] = true; } @@ -572,6 +578,24 @@ class VxsatMicroInst : public VectorArithMicroInst const override; }; +class VlFFTrimVlMicroOp : public VectorMicroInst +{ + private: + RegId srcRegIdxArr[8]; + RegId destRegIdxArr[0]; + std::vector& microops; + + public: + VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl, + uint32_t _microIdx, uint32_t _vlen, + std::vector& _microops); + uint32_t calcVl() const; + Fault execute(ExecContext *, trace::InstRecord *) const override; + std::unique_ptr branchTarget(ThreadContext *) const override; + std::string generateDisassembly(Addr, const loader::SymbolTable *) + const override; +}; + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 7fd6f0ef57..58468d7400 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -626,6 +626,14 @@ decode QUADRANT default Unknown::unknown() { 0x0b: VlmOp::vlm_v({{ Vd_ub[i] = Mem_vc.as()[i]; }}, inst_flags=VectorUnitStrideMaskLoadOp); + 0x10: VleOp::vle8ff_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl && i < this->faultIdx) { + Vd_ub[i] = Mem_vc.as()[i]; + } else { + Vd_ub[i] = Vs2_ub[i]; + } + }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp); } 0x1: VlIndexOp::vluxei8_v({{ Vd_vu[vdElemIdx] = Mem_vc.as()[0]; @@ -667,6 +675,14 @@ decode QUADRANT default Unknown::unknown() { }}, inst_flags=VectorWholeRegisterLoadOp); } } + 0x10: VleOp::vle16ff_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl && i < this->faultIdx) { + Vd_uh[i] = Mem_vc.as()[i]; + } else { + Vd_uh[i] = Vs2_uh[i]; + } + }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp); } 0x1: VlIndexOp::vluxei16_v({{ Vd_vu[vdElemIdx] = Mem_vc.as()[0]; @@ -708,6 +724,14 @@ decode QUADRANT default Unknown::unknown() { }}, inst_flags=VectorWholeRegisterLoadOp); } } + 0x10: VleOp::vle32ff_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl && i < this->faultIdx) { + Vd_uw[i] = Mem_vc.as()[i]; + } else { + Vd_uw[i] = Vs2_uw[i]; + } + }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp); } 0x1: VlIndexOp::vluxei32_v({{ Vd_vu[vdElemIdx] = Mem_vc.as()[0]; @@ -749,6 +773,14 @@ decode QUADRANT default Unknown::unknown() { }}, inst_flags=VectorWholeRegisterLoadOp); } } + 0x10: VleOp::vle64ff_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl && i < this->faultIdx) { + Vd_ud[i] = Mem_vc.as()[i]; + } else { + Vd_ud[i] = Vs2_ud[i]; + } + }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp); } 0x1: VlIndexOp::vluxei64_v({{ Vd_vu[vdElemIdx] = Mem_vc.as()[0]; diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index 3b3309797c..77123fb7ef 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -42,6 +42,19 @@ def declareVMemTemplate(class_name): template class {class_name}; ''' +def getFaultCode(): + return ''' + Addr fault_addr; + if (fault != NoFault && getFaultVAddr(fault, fault_addr)) { + assert(fault_addr >= EA); + faultIdx = (fault_addr - EA) / (width_EEW(machInst.width) / 8); + if (microIdx != 0 || faultIdx != 0) { + fault = NoFault; + trimVl = true; + } + } + ''' + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, @@ -69,6 +82,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, return (header_output, decoder_output, decode_block, exec_output) micro_class_name = exec_template_base + 'MicroInst' + + fault_only_first = 'FaultOnlyFirst' in iop.op_class + microiop = InstObjParams(name + '_micro', Name + 'Micro', exec_template_base + 'MicroInst', @@ -77,7 +93,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, 'postacc_code': postacc_code, 'set_vlenb': setVlenb(), 'set_vlen': setVlen(), - 'declare_vmem_template': declareVMemTemplate(Name + 'Micro')}, + 'declare_vmem_template': declareVMemTemplate(Name + 'Micro'), + 'fault_code': getFaultCode() if fault_only_first else ''}, inst_flags) if mem_flags: diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 1510c106c7..6f72d0a79d 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -82,6 +82,12 @@ def template VleConstructor {{ micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); } + if (_opClass == VectorUnitStrideFaultOnlyFirstLoadOp) { + microop = new VlFFTrimVlMicroOp(_machInst, this->vl, num_microops, + vlen, microops); + this->microops.push_back(microop); + } + this->microops.front()->setFirstMicroop(); this->microops.back()->setLastMicroop(); } @@ -168,12 +174,15 @@ Fault const std::vector byte_enable(mem_size, true); Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); - if (fault != NoFault) - return fault; + + %(fault_code)s; const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t micro_elems = vlen / width_EEW(machInst.width); + if (fault != NoFault) + return fault; + size_t ei; for (size_t i = 0; i < micro_elems; i++) { @@ -215,6 +224,9 @@ Fault const std::vector byte_enable(mem_size, true); Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + + %(fault_code)s; + return fault; } @@ -241,7 +253,9 @@ Fault v0 = tmp_v0.as(); } - memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + if (xc->readMemAccPredicate()) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + } const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t micro_elems = vlen / width_EEW(machInst.width);