diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 3965a45b26..f2bde629e9 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -122,5 +122,178 @@ VConfOp::generateZimmDisassembly() const return s.str(); } +std::string VleMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VleMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VlStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(destRegIdx(0)) << "[" << uint16_t(vdElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(srcRegIdx(2)) << "[" << uint16_t(vs3ElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index cdeb48360c..f989d7ffbf 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -80,6 +80,347 @@ class VConfOp : public RiscvStaticInst std::string generateZimmDisassembly() const; }; +inline uint8_t checked_vtype(bool vill, uint8_t vtype) { + panic_if(vill, "vill has been set"); + const uint8_t vsew = bits(vtype, 5, 3); + panic_if(vsew >= 0b100, "vsew: %#x not supported", vsew); + const uint8_t vlmul = bits(vtype, 2, 0); + panic_if(vlmul == 0b100, "vlmul: %#x not supported", vlmul); + return vtype; +} + +class VectorMacroInst : public RiscvMacroInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvMacroInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } +}; + +class VectorMicroInst : public RiscvMicroInst +{ +protected: + uint8_t microVl; + uint8_t microIdx; + uint8_t vtype; + VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : RiscvMicroInst(mnem, _machInst, __opClass), + microVl(_microVl), + microIdx(_microIdx), + vtype(_machInst.vtype8) + { + this->flags[IsVector] = true; + } +}; + +class VectorNopMicroInst : public RiscvMicroInst +{ +public: + VectorNopMicroInst(ExtMachInst _machInst) + : RiscvMicroInst("vnop", _machInst, No_OpClass) + {} + + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override + { + return NoFault; + } + + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic; + return ss.str(); + } +}; + +class VectorArithMicroInst : public VectorMicroInst +{ +protected: + VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorArithMacroInst : public VectorMacroInst +{ + protected: + VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorMemMicroInst : public VectorMicroInst +{ + protected: + uint32_t offset; // Used to calculate EA. + Request::Flags memAccessFlags; + + VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + uint32_t _offset) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , offset(_offset) + , memAccessFlags(0) + {} +}; + +class VectorMemMacroInst : public VectorMacroInst +{ + protected: + VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + {} +}; + +class VleMacroInst : public VectorMemMacroInst +{ + protected: + VleMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMacroInst : public VectorMemMacroInst +{ + protected: + VseMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VleMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsLoad] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsStore] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMacroInst : public VectorMemMacroInst +{ + protected: + VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMacroInst : public VectorMemMacroInst +{ + protected: + VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMacroInst : public VectorMemMacroInst +{ + protected: + VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMacroInst : public VectorMemMacroInst +{ + protected: + VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMacroInst : public VectorMemMacroInst +{ + protected: + VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vdRegIdx; + uint8_t vdElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VlIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, + 0, 0) + , vdRegIdx(_vdRegIdx), vdElemIdx(_vdElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMacroInst : public VectorMemMacroInst +{ + protected: + VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vs3RegIdx; + uint8_t vs3ElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VsIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0) + , vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2e5b52a879..0288f37ad8 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -500,6 +500,174 @@ decode QUADRANT default Unknown::unknown() { Fd_bits = fd.v; }}, inst_flags=FloatMemReadOp); } + + 0x0: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle8_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ub[i] = Mem_vc.as()[i]; + } else { + Vd_ub[i] = Vs2_ub[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + 0x0b: VlmOp::vlm_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorUnitStrideMaskLoadOp); + } + 0x1: VlIndexOp::vluxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse8_v({{ + Vd_ub[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x5: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle16_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uh[i] = Mem_vc.as()[i]; + } else { + Vd_uh[i] = Vs2_uh[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse16_v({{ + Vd_uh[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x6: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle32_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uw[i] = Mem_vc.as()[i]; + } else { + Vd_uw[i] = Vs2_uw[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse32_v({{ + Vd_uw[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x7: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle64_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ud[i] = Mem_vc.as()[i]; + } else { + Vd_ud[i] = Vs2_ud[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse64_v({{ + Vd_ud[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } } 0x03: decode FUNCT3 { @@ -806,6 +974,106 @@ decode QUADRANT default Unknown::unknown() { Mem_ud = Fs2_bits; }}, inst_flags=FloatMemWriteOp); } + + 0x0: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse8_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + format VsWholeOp { + 0x8: decode NF { + 0x0: vs1r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x1: vs2r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x3: vs4r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x7: vs8r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + } + } + 0x0b: VsmOp::vsm_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideMaskStoreOp); + } + 0x1: VsIndexOp::vsuxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse8_v({{ + Mem_vc.as()[0] = Vs3_ub[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x5: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse16_v({{ + Mem_vc.as()[i] = Vs3_uh[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse16_v({{ + Mem_vc.as()[0] = Vs3_uh[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x6: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse32_v({{ + Mem_vc.as()[i] = Vs3_uw[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse32_v({{ + Mem_vc.as()[0] = Vs3_uw[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x7: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse64_v({{ + Mem_vc.as()[i] = Vs3_ud[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse64_v({{ + Mem_vc.as()[0] = Vs3_ud[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } } 0x0b: decode FUNCT3 { diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 0f7c94da9a..4bdc3021d5 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -38,6 +38,7 @@ ##include "amo.isa" ##include "bs.isa" ##include "vector_conf.isa" +##include "vector_mem.isa" // Include formats for nonstandard extensions ##include "compressed.isa" diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa new file mode 100644 index 0000000000..113250d5cf --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -0,0 +1,205 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + +def VMemBase(name, Name, ea_code, memacc_code, mem_flags, + inst_flags, base_class, postacc_code='', + declare_template_base=VMemMacroDeclare, + decode_template=BasicDecode, exec_template_base='', + # If it's a macroop, the corresponding microops will be + # generated. + is_macroop=True): + # Make sure flags are in lists (convert to lists if not). + mem_flags = makeList(mem_flags) + inst_flags = makeList(inst_flags) + iop = InstObjParams(name, Name, base_class, + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code }, + inst_flags) + + constructTemplate = eval(exec_template_base + 'Constructor') + + header_output = declare_template_base.subst(iop) + decoder_output = '' + if declare_template_base is not VMemTemplateMacroDeclare: + decoder_output += constructTemplate.subst(iop) + else: + header_output += constructTemplate.subst(iop) + decode_block = decode_template.subst(iop) + exec_output = '' + if not is_macroop: + return (header_output, decoder_output, decode_block, exec_output) + + microiop = InstObjParams(name + '_micro', + Name + 'Micro', + exec_template_base + 'MicroInst', + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code}, + inst_flags) + + if mem_flags: + mem_flags = [ 'Request::%s' % flag for flag in mem_flags ] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + microiop.constructor += s + + microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare') + microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute') + microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc') + microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc') + header_output = microDeclTemplate.subst(microiop) + header_output + micro_exec_output = (microExecTemplate.subst(microiop) + + microInitTemplate.subst(microiop) + + microCompTemplate.subst(microiop)) + if declare_template_base is not VMemTemplateMacroDeclare: + exec_output += micro_exec_output + else: + header_output += micro_exec_output + + return (header_output, decoder_output, decode_block, exec_output) + +}}; + +def format VleOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vle') +}}; + +def format VseOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vse') +}}; + +def format VlmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vlm', is_macroop=False) +}}; + +def format VsmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vsm', is_macroop=False) +}}; + +def format VlWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlWholeMacroInst', exec_template_base='VlWhole') +}}; + +def format VsWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsWholeMacroInst', exec_template_base='VsWhole') +}}; + +def format VlStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlStrideMacroInst', exec_template_base='VlStride') +}}; + +def format VsStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsStrideMacroInst', exec_template_base='VsStride') +}}; + +def format VlIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlIndexMacroInst', exec_template_base='VlIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; + +def format VsIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsIndexMacroInst', exec_template_base='VsIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index 1d544f40ed..76f2388faf 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -46,6 +46,7 @@ output header {{ #include #include +#include "arch/generic/memhelpers.hh" #include "arch/riscv/decoder.hh" #include "arch/riscv/insts/amo.hh" #include "arch/riscv/insts/bs.hh" @@ -55,6 +56,7 @@ output header {{ #include "arch/riscv/insts/standard.hh" #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/insts/unknown.hh" +#include "arch/riscv/insts/vector.hh" #include "arch/riscv/interrupts.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" @@ -68,9 +70,15 @@ output decoder {{ #include #include +/* riscv softfloat library */ +#include +#include +#include + #include "arch/riscv/decoder.hh" #include "arch/riscv/faults.hh" #include "arch/riscv/mmu.hh" +#include "arch/riscv/regs/float.hh" #include "base/cprintf.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" diff --git a/src/arch/riscv/isa/main.isa b/src/arch/riscv/isa/main.isa index 24f366b00c..2923a965da 100644 --- a/src/arch/riscv/isa/main.isa +++ b/src/arch/riscv/isa/main.isa @@ -50,6 +50,9 @@ namespace RiscvISA; //Include the operand_types and operand definitions ##include "operands.isa" +//Include the definitions for the instruction templates +##include "templates/templates.isa" + //Include the definitions for the instruction formats ##include "formats/formats.isa" diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa new file mode 100644 index 0000000000..b4de46d846 --- /dev/null +++ b/src/arch/riscv/isa/templates/templates.isa @@ -0,0 +1,2 @@ +// Include +##include "vector_mem.isa" diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa new file mode 100644 index 0000000000..d54243ad7d --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -0,0 +1,1349 @@ +def template VMemMacroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMemTemplateMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VleConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VleMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; + +}; + +}}; + +def template VleMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, + byte_enable); + if (fault != NoFault) + return fault; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VleMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, + byte_enable); + return fault; +} + +}}; + +def template VleMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VseConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); +} + +}}; + +def template VseMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VseMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + + StaticInstPtr microop; + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[0]; + RegId srcRegIdxArr[2]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VlWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[1]; + RegId srcRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + this->flags[IsVector] = true; + this->flags[IsLoad] = true; + } + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = readMemAtomicLE(xc, traceData, EA, + *(vreg_t::Container*)(&Mem), memAccessFlags); + if (fault != NoFault) + return fault; + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; i++) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + return fault; +} + +}}; + +def template VlWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; ++i) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + if (microIdx == 0) { + // treat vma as vmu + // if (machInst.vtype8.vma == 0) + memcpy(Vd, old_Vd, microVl * elem_size); + // treat vta as vtu + // if (machInst.vtype8.vta == 0) + memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + } else { + memcpy(Vd, old_Vd, VLENB); + } + + size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_store = machInst.vm || elem_mask(v0, ei); + if (need_store) { + const std::vector byte_enable(mem_size, need_store); + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vd_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vdRegIdx = i / vd_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vdElemIdx = j + micro_vlmax * (i % vd_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vdRegIdx, vdElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->vdElemIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + using vu = std::make_unsigned_t; + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Vd, old_Vd, VLENB); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vs3_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; + const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vs3RegIdx = i / vs3_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vs3ElemIdx = j + micro_vlmax * (i % vs3_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vs3RegIdx, vs3ElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + constexpr uint8_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VMemTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst); + } + case 0b001: { + return new %(class_name)s(machInst); + } + case 0b010: { + return new %(class_name)s(machInst); + } + case 0b011: { + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index e0a8494ece..1db6d6df3b 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -241,6 +241,61 @@ remu(T rs1, T rs2) return (rs2 == 0) ? rs1 : rs1 % rs2; } +/* +* Encode LMUL to lmul as follows: +* LMUL vlmul lmul +* 1 000 0 +* 2 001 1 +* 4 010 2 +* 8 011 3 +* - 100 - +* 1/8 101 -3 +* 1/4 110 -2 +* 1/2 111 -1 +* +* then, we can calculate VLMAX = vlen >> (vsew + 3 - lmul) +* e.g. vlen = 256 bits, SEW = 16, LMUL = 1/8 +* => VLMAX = vlen >> (1 + 3 - (-3)) +* = 256 >> 7 +* = 2 +* Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h +*/ +inline uint64_t +vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + lmul = per_reg ? std::min(0, lmul) : lmul; + int64_t vsew = bits(vtype, 5, 3); + return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); +} + +inline uint64_t +width_EEW(uint64_t width) +{ + switch (width) { + case 0b000: return 8; + case 0b101: return 16; + case 0b110: return 32; + case 0b111: return 64; + default: GEM5_UNREACHABLE; + } +} + +/* + * Spec Section 4.5 + * Ref: + * https://github.com/qemu/qemu/blob/c7d773ae/target/riscv/vector_helper.c +*/ +template +inline int +elem_mask(const T* vs, const int index) +{ + static_assert(std::is_integral_v); + int idx = index / (sizeof(T)*8); + int pos = index % (sizeof(T)*8); + return (vs[idx] >> pos) & 1; +} + } // namespace RiscvISA } // namespace gem5