arch-riscv: add unit-stride fault-only-first loads (i.e. vle*ff) (#794)

This patch provides unit-stride fault-only-first loads (i.e. vle*ff) for
the RISC-V architecture.

They are implemented within the regular unit-stride load (i.e. vle*). A
snippet named `fault_code` is inserted with templating to change their
behaviour to fault-only-first.

A part from this, a new micro based on the vset\*vl\* instructions
(VlFFTrimVlMicroOp) is inserted as the last micro in the macro
constructor to trim the VL to it's corresponding length based on the
faulting index.

This trimming micro waits for the load micros to finish (via data
dependency) and has a reference to the other micros to check whether
they faulted or not. The new VL is calculated with the VL of each micro,
stopping on the first faulting one (if there's such a fault).

I've tested this with VLEN=128,256,...,16384 and all the corresponding
SEW+LMUL configurations.


Change-Id: I7b937f6bcb396725461bba4912d2667f3b22f955
This commit is contained in:
Saúl
2024-02-08 18:15:58 +01:00
committed by GitHub
parent 4aecf9d35c
commit 804f137325
7 changed files with 208 additions and 4 deletions

View File

@@ -248,5 +248,23 @@ SyscallFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
tc->getSystemPtr()->workload->syscall(tc);
}
bool
getFaultVAddr(Fault fault, Addr &va)
{
auto addr_fault = dynamic_cast<AddressFault *>(fault.get());
if (addr_fault) {
va = addr_fault->trap_value();
return true;
}
auto pgt_fault = dynamic_cast<GenericPageTableFault *>(fault.get());
if (pgt_fault) {
va = pgt_fault->getFaultVAddr();
return true;
}
return false;
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -330,6 +330,18 @@ class SyscallFault : public RiscvFault
void invokeSE(ThreadContext *tc, const StaticInstPtr &inst) override;
};
/**
* Returns true if the fault passed as a first argument was triggered
* by a memory access, false otherwise.
* If true it is storing the faulting address in the va argument
*
* @param fault generated fault
* @param va function will modify this passed-by-reference parameter
* with the correct faulting virtual address
* @return true if va contains a valid value, false otherwise
*/
bool getFaultVAddr(Fault fault, Addr &va);
} // namespace RiscvISA
} // namespace gem5

View File

@@ -501,5 +501,92 @@ VxsatMicroInst::generateDisassembly(Addr pc,
return ss.str();
}
VlFFTrimVlMicroOp::VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl,
uint32_t _microIdx, uint32_t _vlen, std::vector<StaticInstPtr>& _microops)
: VectorMicroInst("vlff_trimvl_v_micro", _machInst, VectorConfigOp,
_microVl, _microIdx, _vlen),
microops(_microops)
{
setRegIdxArrays(
reinterpret_cast<RegIdArrayPtr>(
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
nullptr
);
// Create data dependency with load micros
for (uint8_t i=0; i<microIdx; i++) {
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + i]);
}
this->flags[IsControl] = true;
this->flags[IsIndirectControl] = true;
this->flags[IsInteger] = true;
this->flags[IsUncondControl] = true;
}
uint32_t
VlFFTrimVlMicroOp::calcVl() const
{
uint32_t vl = 0;
for (uint8_t i=0; i<microIdx; i++) {
VleMicroInst& micro = static_cast<VleMicroInst&>(*microops[i]);
vl += micro.faultIdx;
if (micro.trimVl)
break;
}
return vl;
}
Fault
VlFFTrimVlMicroOp::execute(ExecContext *xc, trace::InstRecord *traceData) const
{
auto tc = xc->tcBase();
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (!misa.rvv || status.vs == VPUStatus::OFF) {
return std::make_shared<IllegalInstFault>(
"RVV is disabled or VPU is off", machInst);
}
PCState pc;
set(pc, xc->pcState());
uint32_t new_vl = calcVl();
tc->setMiscReg(MISCREG_VSTART, 0);
RegVal final_val = new_vl;
if (traceData) {
traceData->setData(miscRegClass, final_val);
}
pc.vl(new_vl);
xc->pcState(pc);
return NoFault;
}
std::unique_ptr<PCStateBase>
VlFFTrimVlMicroOp::branchTarget(ThreadContext *tc) const
{
PCStateBase *pc_ptr = tc->pcState().clone();
uint32_t new_vl = calcVl();
pc_ptr->as<PCState>().vl(new_vl);
return std::unique_ptr<PCStateBase>{pc_ptr};
}
std::string
VlFFTrimVlMicroOp::generateDisassembly(Addr pc,
const loader::SymbolTable *symtab) const
{
std::stringstream ss;
ss << mnemonic << " vl";
return ss.str();
}
} // namespace RiscvISA
} // namespace gem5

View File

@@ -31,6 +31,7 @@
#include <string>
#include "arch/riscv/faults.hh"
#include "arch/riscv/insts/static_inst.hh"
#include "arch/riscv/isa.hh"
#include "arch/riscv/regs/misc.hh"
@@ -306,6 +307,10 @@ class VseMacroInst : public VectorMemMacroInst
class VleMicroInst : public VectorMicroInst
{
public:
mutable bool trimVl;
mutable uint32_t faultIdx;
protected:
Request::Flags memAccessFlags;
@@ -313,6 +318,7 @@ class VleMicroInst : public VectorMicroInst
uint32_t _microVl, uint32_t _microIdx, uint32_t _vlen)
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
_microIdx, _vlen)
, trimVl(false), faultIdx(_microVl)
{
this->flags[IsLoad] = true;
}
@@ -572,6 +578,24 @@ class VxsatMicroInst : public VectorArithMicroInst
const override;
};
class VlFFTrimVlMicroOp : public VectorMicroInst
{
private:
RegId srcRegIdxArr[8];
RegId destRegIdxArr[0];
std::vector<StaticInstPtr>& microops;
public:
VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl,
uint32_t _microIdx, uint32_t _vlen,
std::vector<StaticInstPtr>& _microops);
uint32_t calcVl() const;
Fault execute(ExecContext *, trace::InstRecord *) const override;
std::unique_ptr<PCStateBase> branchTarget(ThreadContext *) const override;
std::string generateDisassembly(Addr, const loader::SymbolTable *)
const override;
};
} // namespace RiscvISA
} // namespace gem5

View File

@@ -626,6 +626,14 @@ decode QUADRANT default Unknown::unknown() {
0x0b: VlmOp::vlm_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorUnitStrideMaskLoadOp);
0x10: VleOp::vle8ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei8_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
@@ -667,6 +675,14 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle16ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei16_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
@@ -708,6 +724,14 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle32ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei32_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
@@ -749,6 +773,14 @@ decode QUADRANT default Unknown::unknown() {
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle64ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei64_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];

View File

@@ -42,6 +42,19 @@ def declareVMemTemplate(class_name):
template class {class_name}<uint64_t>;
'''
def getFaultCode():
return '''
Addr fault_addr;
if (fault != NoFault && getFaultVAddr(fault, fault_addr)) {
assert(fault_addr >= EA);
faultIdx = (fault_addr - EA) / (width_EEW(machInst.width) / 8);
if (microIdx != 0 || faultIdx != 0) {
fault = NoFault;
trimVl = true;
}
}
'''
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
inst_flags, base_class, postacc_code='',
declare_template_base=VMemMacroDeclare,
@@ -69,6 +82,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
return (header_output, decoder_output, decode_block, exec_output)
micro_class_name = exec_template_base + 'MicroInst'
fault_only_first = 'FaultOnlyFirst' in iop.op_class
microiop = InstObjParams(name + '_micro',
Name + 'Micro',
exec_template_base + 'MicroInst',
@@ -77,7 +93,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
'postacc_code': postacc_code,
'set_vlenb': setVlenb(),
'set_vlen': setVlen(),
'declare_vmem_template': declareVMemTemplate(Name + 'Micro')},
'declare_vmem_template': declareVMemTemplate(Name + 'Micro'),
'fault_code': getFaultCode() if fault_only_first else ''},
inst_flags)
if mem_flags:

View File

@@ -82,6 +82,12 @@ def template VleConstructor {{
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
}
if (_opClass == VectorUnitStrideFaultOnlyFirstLoadOp) {
microop = new VlFFTrimVlMicroOp(_machInst, this->vl, num_microops,
vlen, microops);
this->microops.push_back(microop);
}
this->microops.front()->setFirstMicroop();
this->microops.back()->setLastMicroop();
}
@@ -168,12 +174,15 @@ Fault
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
byte_enable);
if (fault != NoFault)
return fault;
%(fault_code)s;
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);
if (fault != NoFault)
return fault;
size_t ei;
for (size_t i = 0; i < micro_elems; i++) {
@@ -215,6 +224,9 @@ Fault
const std::vector<bool> byte_enable(mem_size, true);
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
byte_enable);
%(fault_code)s;
return fault;
}
@@ -241,7 +253,9 @@ Fault
v0 = tmp_v0.as<uint8_t>();
}
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
if (xc->readMemAccPredicate()) {
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
}
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
const size_t micro_elems = vlen / width_EEW(machInst.width);