arch-riscv: add unit-stride fault-only-first loads (i.e. vle*ff) (#794)
This patch provides unit-stride fault-only-first loads (i.e. vle*ff) for the RISC-V architecture. They are implemented within the regular unit-stride load (i.e. vle*). A snippet named `fault_code` is inserted with templating to change their behaviour to fault-only-first. A part from this, a new micro based on the vset\*vl\* instructions (VlFFTrimVlMicroOp) is inserted as the last micro in the macro constructor to trim the VL to it's corresponding length based on the faulting index. This trimming micro waits for the load micros to finish (via data dependency) and has a reference to the other micros to check whether they faulted or not. The new VL is calculated with the VL of each micro, stopping on the first faulting one (if there's such a fault). I've tested this with VLEN=128,256,...,16384 and all the corresponding SEW+LMUL configurations. Change-Id: I7b937f6bcb396725461bba4912d2667f3b22f955
This commit is contained in:
@@ -248,5 +248,23 @@ SyscallFault::invokeSE(ThreadContext *tc, const StaticInstPtr &inst)
|
|||||||
tc->getSystemPtr()->workload->syscall(tc);
|
tc->getSystemPtr()->workload->syscall(tc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
getFaultVAddr(Fault fault, Addr &va)
|
||||||
|
{
|
||||||
|
auto addr_fault = dynamic_cast<AddressFault *>(fault.get());
|
||||||
|
if (addr_fault) {
|
||||||
|
va = addr_fault->trap_value();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto pgt_fault = dynamic_cast<GenericPageTableFault *>(fault.get());
|
||||||
|
if (pgt_fault) {
|
||||||
|
va = pgt_fault->getFaultVAddr();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace RiscvISA
|
} // namespace RiscvISA
|
||||||
} // namespace gem5
|
} // namespace gem5
|
||||||
|
|||||||
@@ -330,6 +330,18 @@ class SyscallFault : public RiscvFault
|
|||||||
void invokeSE(ThreadContext *tc, const StaticInstPtr &inst) override;
|
void invokeSE(ThreadContext *tc, const StaticInstPtr &inst) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the fault passed as a first argument was triggered
|
||||||
|
* by a memory access, false otherwise.
|
||||||
|
* If true it is storing the faulting address in the va argument
|
||||||
|
*
|
||||||
|
* @param fault generated fault
|
||||||
|
* @param va function will modify this passed-by-reference parameter
|
||||||
|
* with the correct faulting virtual address
|
||||||
|
* @return true if va contains a valid value, false otherwise
|
||||||
|
*/
|
||||||
|
bool getFaultVAddr(Fault fault, Addr &va);
|
||||||
|
|
||||||
} // namespace RiscvISA
|
} // namespace RiscvISA
|
||||||
} // namespace gem5
|
} // namespace gem5
|
||||||
|
|
||||||
|
|||||||
@@ -501,5 +501,92 @@ VxsatMicroInst::generateDisassembly(Addr pc,
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
VlFFTrimVlMicroOp::VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl,
|
||||||
|
uint32_t _microIdx, uint32_t _vlen, std::vector<StaticInstPtr>& _microops)
|
||||||
|
: VectorMicroInst("vlff_trimvl_v_micro", _machInst, VectorConfigOp,
|
||||||
|
_microVl, _microIdx, _vlen),
|
||||||
|
microops(_microops)
|
||||||
|
{
|
||||||
|
setRegIdxArrays(
|
||||||
|
reinterpret_cast<RegIdArrayPtr>(
|
||||||
|
&std::remove_pointer_t<decltype(this)>::srcRegIdxArr),
|
||||||
|
nullptr
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create data dependency with load micros
|
||||||
|
for (uint8_t i=0; i<microIdx; i++) {
|
||||||
|
setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->flags[IsControl] = true;
|
||||||
|
this->flags[IsIndirectControl] = true;
|
||||||
|
this->flags[IsInteger] = true;
|
||||||
|
this->flags[IsUncondControl] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
VlFFTrimVlMicroOp::calcVl() const
|
||||||
|
{
|
||||||
|
uint32_t vl = 0;
|
||||||
|
for (uint8_t i=0; i<microIdx; i++) {
|
||||||
|
VleMicroInst& micro = static_cast<VleMicroInst&>(*microops[i]);
|
||||||
|
vl += micro.faultIdx;
|
||||||
|
|
||||||
|
if (micro.trimVl)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return vl;
|
||||||
|
}
|
||||||
|
|
||||||
|
Fault
|
||||||
|
VlFFTrimVlMicroOp::execute(ExecContext *xc, trace::InstRecord *traceData) const
|
||||||
|
{
|
||||||
|
auto tc = xc->tcBase();
|
||||||
|
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
||||||
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
||||||
|
if (!misa.rvv || status.vs == VPUStatus::OFF) {
|
||||||
|
return std::make_shared<IllegalInstFault>(
|
||||||
|
"RVV is disabled or VPU is off", machInst);
|
||||||
|
}
|
||||||
|
|
||||||
|
PCState pc;
|
||||||
|
set(pc, xc->pcState());
|
||||||
|
|
||||||
|
uint32_t new_vl = calcVl();
|
||||||
|
|
||||||
|
tc->setMiscReg(MISCREG_VSTART, 0);
|
||||||
|
|
||||||
|
RegVal final_val = new_vl;
|
||||||
|
if (traceData) {
|
||||||
|
traceData->setData(miscRegClass, final_val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pc.vl(new_vl);
|
||||||
|
xc->pcState(pc);
|
||||||
|
|
||||||
|
return NoFault;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<PCStateBase>
|
||||||
|
VlFFTrimVlMicroOp::branchTarget(ThreadContext *tc) const
|
||||||
|
{
|
||||||
|
PCStateBase *pc_ptr = tc->pcState().clone();
|
||||||
|
|
||||||
|
uint32_t new_vl = calcVl();
|
||||||
|
|
||||||
|
pc_ptr->as<PCState>().vl(new_vl);
|
||||||
|
return std::unique_ptr<PCStateBase>{pc_ptr};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string
|
||||||
|
VlFFTrimVlMicroOp::generateDisassembly(Addr pc,
|
||||||
|
const loader::SymbolTable *symtab) const
|
||||||
|
{
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << mnemonic << " vl";
|
||||||
|
return ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace RiscvISA
|
} // namespace RiscvISA
|
||||||
} // namespace gem5
|
} // namespace gem5
|
||||||
|
|||||||
@@ -31,6 +31,7 @@
|
|||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
#include "arch/riscv/faults.hh"
|
||||||
#include "arch/riscv/insts/static_inst.hh"
|
#include "arch/riscv/insts/static_inst.hh"
|
||||||
#include "arch/riscv/isa.hh"
|
#include "arch/riscv/isa.hh"
|
||||||
#include "arch/riscv/regs/misc.hh"
|
#include "arch/riscv/regs/misc.hh"
|
||||||
@@ -306,6 +307,10 @@ class VseMacroInst : public VectorMemMacroInst
|
|||||||
|
|
||||||
class VleMicroInst : public VectorMicroInst
|
class VleMicroInst : public VectorMicroInst
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
mutable bool trimVl;
|
||||||
|
mutable uint32_t faultIdx;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Request::Flags memAccessFlags;
|
Request::Flags memAccessFlags;
|
||||||
|
|
||||||
@@ -313,6 +318,7 @@ class VleMicroInst : public VectorMicroInst
|
|||||||
uint32_t _microVl, uint32_t _microIdx, uint32_t _vlen)
|
uint32_t _microVl, uint32_t _microIdx, uint32_t _vlen)
|
||||||
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
|
: VectorMicroInst(mnem, _machInst, __opClass, _microVl,
|
||||||
_microIdx, _vlen)
|
_microIdx, _vlen)
|
||||||
|
, trimVl(false), faultIdx(_microVl)
|
||||||
{
|
{
|
||||||
this->flags[IsLoad] = true;
|
this->flags[IsLoad] = true;
|
||||||
}
|
}
|
||||||
@@ -572,6 +578,24 @@ class VxsatMicroInst : public VectorArithMicroInst
|
|||||||
const override;
|
const override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class VlFFTrimVlMicroOp : public VectorMicroInst
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
RegId srcRegIdxArr[8];
|
||||||
|
RegId destRegIdxArr[0];
|
||||||
|
std::vector<StaticInstPtr>& microops;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VlFFTrimVlMicroOp(ExtMachInst _machInst, uint32_t _microVl,
|
||||||
|
uint32_t _microIdx, uint32_t _vlen,
|
||||||
|
std::vector<StaticInstPtr>& _microops);
|
||||||
|
uint32_t calcVl() const;
|
||||||
|
Fault execute(ExecContext *, trace::InstRecord *) const override;
|
||||||
|
std::unique_ptr<PCStateBase> branchTarget(ThreadContext *) const override;
|
||||||
|
std::string generateDisassembly(Addr, const loader::SymbolTable *)
|
||||||
|
const override;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace RiscvISA
|
} // namespace RiscvISA
|
||||||
} // namespace gem5
|
} // namespace gem5
|
||||||
|
|
||||||
|
|||||||
@@ -626,6 +626,14 @@ decode QUADRANT default Unknown::unknown() {
|
|||||||
0x0b: VlmOp::vlm_v({{
|
0x0b: VlmOp::vlm_v({{
|
||||||
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||||
}}, inst_flags=VectorUnitStrideMaskLoadOp);
|
}}, inst_flags=VectorUnitStrideMaskLoadOp);
|
||||||
|
0x10: VleOp::vle8ff_v({{
|
||||||
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||||
|
i < this->microVl && i < this->faultIdx) {
|
||||||
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
||||||
|
} else {
|
||||||
|
Vd_ub[i] = Vs2_ub[i];
|
||||||
|
}
|
||||||
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
||||||
}
|
}
|
||||||
0x1: VlIndexOp::vluxei8_v({{
|
0x1: VlIndexOp::vluxei8_v({{
|
||||||
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
||||||
@@ -667,6 +675,14 @@ decode QUADRANT default Unknown::unknown() {
|
|||||||
}}, inst_flags=VectorWholeRegisterLoadOp);
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
0x10: VleOp::vle16ff_v({{
|
||||||
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||||
|
i < this->microVl && i < this->faultIdx) {
|
||||||
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
||||||
|
} else {
|
||||||
|
Vd_uh[i] = Vs2_uh[i];
|
||||||
|
}
|
||||||
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
||||||
}
|
}
|
||||||
0x1: VlIndexOp::vluxei16_v({{
|
0x1: VlIndexOp::vluxei16_v({{
|
||||||
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
||||||
@@ -708,6 +724,14 @@ decode QUADRANT default Unknown::unknown() {
|
|||||||
}}, inst_flags=VectorWholeRegisterLoadOp);
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
0x10: VleOp::vle32ff_v({{
|
||||||
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||||
|
i < this->microVl && i < this->faultIdx) {
|
||||||
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
||||||
|
} else {
|
||||||
|
Vd_uw[i] = Vs2_uw[i];
|
||||||
|
}
|
||||||
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
||||||
}
|
}
|
||||||
0x1: VlIndexOp::vluxei32_v({{
|
0x1: VlIndexOp::vluxei32_v({{
|
||||||
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
||||||
@@ -749,6 +773,14 @@ decode QUADRANT default Unknown::unknown() {
|
|||||||
}}, inst_flags=VectorWholeRegisterLoadOp);
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
0x10: VleOp::vle64ff_v({{
|
||||||
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
||||||
|
i < this->microVl && i < this->faultIdx) {
|
||||||
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
||||||
|
} else {
|
||||||
|
Vd_ud[i] = Vs2_ud[i];
|
||||||
|
}
|
||||||
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
||||||
}
|
}
|
||||||
0x1: VlIndexOp::vluxei64_v({{
|
0x1: VlIndexOp::vluxei64_v({{
|
||||||
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
||||||
|
|||||||
@@ -42,6 +42,19 @@ def declareVMemTemplate(class_name):
|
|||||||
template class {class_name}<uint64_t>;
|
template class {class_name}<uint64_t>;
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
def getFaultCode():
|
||||||
|
return '''
|
||||||
|
Addr fault_addr;
|
||||||
|
if (fault != NoFault && getFaultVAddr(fault, fault_addr)) {
|
||||||
|
assert(fault_addr >= EA);
|
||||||
|
faultIdx = (fault_addr - EA) / (width_EEW(machInst.width) / 8);
|
||||||
|
if (microIdx != 0 || faultIdx != 0) {
|
||||||
|
fault = NoFault;
|
||||||
|
trimVl = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'''
|
||||||
|
|
||||||
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
|
def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
|
||||||
inst_flags, base_class, postacc_code='',
|
inst_flags, base_class, postacc_code='',
|
||||||
declare_template_base=VMemMacroDeclare,
|
declare_template_base=VMemMacroDeclare,
|
||||||
@@ -69,6 +82,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
|
|||||||
return (header_output, decoder_output, decode_block, exec_output)
|
return (header_output, decoder_output, decode_block, exec_output)
|
||||||
|
|
||||||
micro_class_name = exec_template_base + 'MicroInst'
|
micro_class_name = exec_template_base + 'MicroInst'
|
||||||
|
|
||||||
|
fault_only_first = 'FaultOnlyFirst' in iop.op_class
|
||||||
|
|
||||||
microiop = InstObjParams(name + '_micro',
|
microiop = InstObjParams(name + '_micro',
|
||||||
Name + 'Micro',
|
Name + 'Micro',
|
||||||
exec_template_base + 'MicroInst',
|
exec_template_base + 'MicroInst',
|
||||||
@@ -77,7 +93,8 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags,
|
|||||||
'postacc_code': postacc_code,
|
'postacc_code': postacc_code,
|
||||||
'set_vlenb': setVlenb(),
|
'set_vlenb': setVlenb(),
|
||||||
'set_vlen': setVlen(),
|
'set_vlen': setVlen(),
|
||||||
'declare_vmem_template': declareVMemTemplate(Name + 'Micro')},
|
'declare_vmem_template': declareVMemTemplate(Name + 'Micro'),
|
||||||
|
'fault_code': getFaultCode() if fault_only_first else ''},
|
||||||
inst_flags)
|
inst_flags)
|
||||||
|
|
||||||
if mem_flags:
|
if mem_flags:
|
||||||
|
|||||||
@@ -82,6 +82,12 @@ def template VleConstructor {{
|
|||||||
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
|
micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (_opClass == VectorUnitStrideFaultOnlyFirstLoadOp) {
|
||||||
|
microop = new VlFFTrimVlMicroOp(_machInst, this->vl, num_microops,
|
||||||
|
vlen, microops);
|
||||||
|
this->microops.push_back(microop);
|
||||||
|
}
|
||||||
|
|
||||||
this->microops.front()->setFirstMicroop();
|
this->microops.front()->setFirstMicroop();
|
||||||
this->microops.back()->setLastMicroop();
|
this->microops.back()->setLastMicroop();
|
||||||
}
|
}
|
||||||
@@ -168,12 +174,15 @@ Fault
|
|||||||
const std::vector<bool> byte_enable(mem_size, true);
|
const std::vector<bool> byte_enable(mem_size, true);
|
||||||
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
|
Fault fault = xc->readMem(EA, Mem.as<uint8_t>(), mem_size, memAccessFlags,
|
||||||
byte_enable);
|
byte_enable);
|
||||||
if (fault != NoFault)
|
|
||||||
return fault;
|
%(fault_code)s;
|
||||||
|
|
||||||
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
||||||
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
||||||
|
|
||||||
|
if (fault != NoFault)
|
||||||
|
return fault;
|
||||||
|
|
||||||
size_t ei;
|
size_t ei;
|
||||||
|
|
||||||
for (size_t i = 0; i < micro_elems; i++) {
|
for (size_t i = 0; i < micro_elems; i++) {
|
||||||
@@ -215,6 +224,9 @@ Fault
|
|||||||
const std::vector<bool> byte_enable(mem_size, true);
|
const std::vector<bool> byte_enable(mem_size, true);
|
||||||
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
|
Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags,
|
||||||
byte_enable);
|
byte_enable);
|
||||||
|
|
||||||
|
%(fault_code)s;
|
||||||
|
|
||||||
return fault;
|
return fault;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -241,7 +253,9 @@ Fault
|
|||||||
v0 = tmp_v0.as<uint8_t>();
|
v0 = tmp_v0.as<uint8_t>();
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
|
if (xc->readMemAccPredicate()) {
|
||||||
|
memcpy(Mem.as<uint8_t>(), pkt->getPtr<uint8_t>(), pkt->getSize());
|
||||||
|
}
|
||||||
|
|
||||||
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true);
|
||||||
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
const size_t micro_elems = vlen / width_EEW(machInst.width);
|
||||||
|
|||||||
Reference in New Issue
Block a user