From 57e0ba7765d385ba4983747f2d27e0c8379a8460 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Wed, 9 Aug 2023 12:30:13 +0200 Subject: [PATCH 1/7] arch-riscv: Define VecRegContainer with maximum expected length This path redefine VecRegContainer for RISCV so it can hold every VLEN + ELEN possible configuration used at execution time Change-Id: Ie6abd01a1c4ebe9aae3d93f4e835fcfdc4a82dcd --- src/arch/riscv/regs/vector.hh | 8 +++----- src/arch/riscv/types.hh | 4 ++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/arch/riscv/regs/vector.hh b/src/arch/riscv/regs/vector.hh index 388e1cb78d..60c840395f 100644 --- a/src/arch/riscv/regs/vector.hh +++ b/src/arch/riscv/regs/vector.hh @@ -36,6 +36,7 @@ #include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" +#include "arch/riscv/types.hh" #include "base/bitunion.hh" #include "cpu/reg_class.hh" #include "debug/VecRegs.hh" @@ -46,13 +47,10 @@ namespace gem5 namespace RiscvISA { -constexpr unsigned ELEN = 64; -constexpr unsigned VLEN = 256; -constexpr unsigned VLENB = VLEN / 8; - -using VecRegContainer = gem5::VecRegContainer; +using VecRegContainer = gem5::VecRegContainer; using vreg_t = VecRegContainer; + const int NumVecStandardRegs = 32; const int NumVecInternalRegs = 8; // Used by vector uop const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs; diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 1d501dc05f..01c600d148 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -178,6 +178,10 @@ BitUnion64(ExtMachInst) EndBitUnion(ExtMachInst) +constexpr unsigned MaxVecLenInBits = 65536; +constexpr unsigned MaxVecLenInBytes = MaxVecLenInBits >> 3; + + } // namespace RiscvISA } // namespace gem5 From 5d97cb8b0b8c1e60e3d0b09e45c92bc4c89658e0 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Sat, 9 Sep 2023 12:43:28 +0200 Subject: [PATCH 2/7] arch-riscv: Define VLEN and ELEN through the ISA object This commit define VLEN and ELEN values as parameters of the RiscvISA class. Change-Id: Ic5b80397d316522d729e4db4f906aa189f27a491 --- src/arch/riscv/RiscvISA.py | 38 +++++++++++++++++++++++++++++++++++++- src/arch/riscv/isa.cc | 13 +++++++++---- src/arch/riscv/isa.hh | 17 ++++++++++++++++- src/arch/riscv/pcstate.hh | 4 ++-- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py index f66171a95a..3f123405e9 100644 --- a/src/arch/riscv/RiscvISA.py +++ b/src/arch/riscv/RiscvISA.py @@ -38,11 +38,37 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from m5.params import Enum +from m5.params import Enum, UInt32 from m5.params import Param from m5.objects.BaseISA import BaseISA +class RiscvVectorLength(UInt32): + min = 8 + max = 65536 + + def _check(self): + super()._check() + + # VLEN needs to be a whole power of 2. We already know value is + # not zero. Hence: + if self.value & (self.value - 1) != 0: + raise TypeError("VLEN is not a power of 2: %d" % self.value) + + +class RiscvVectorElementLength(UInt32): + min = 8 + max = 64 + + def _check(self): + super()._check() + + # ELEN needs to be a whole power of 2. We already know value is + # not zero. Hence: + if self.value & (self.value - 1) != 0: + raise TypeError("ELEN is not a power of 2: %d" % self.value) + + class RiscvType(Enum): vals = ["RV32", "RV64"] @@ -58,3 +84,13 @@ class RiscvISA(BaseISA): riscv_type = Param.RiscvType("RV64", "RV32 or RV64") enable_rvv = Param.Bool(True, "Enable vector extension") + vlen = Param.RiscvVectorLength( + 256, + "Length of each vector register in bits. \ + VLEN in Ch. 2 of RISC-V vector spec", + ) + elen = Param.RiscvVectorElementLength( + 64, + "Length of each vector element in bits. \ + ELEN in Ch. 2 of RISC-V vector spec", + ) diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index a1f4056bf5..5785a14e92 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -36,6 +36,7 @@ #include #include "arch/riscv/faults.hh" +#include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/interrupts.hh" #include "arch/riscv/mmu.hh" #include "arch/riscv/pagetable.hh" @@ -253,10 +254,9 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace -ISA::ISA(const Params &p) : - BaseISA(p), _rvType(p.riscv_type), checkAlignment(p.check_alignment), - enableRvv(p.enable_rvv) - +ISA::ISA(const Params &p) :BaseISA(p), + _rvType(p.riscv_type), checkAlignment(p.check_alignment), + enableRvv(p.enable_rvv),vlen(p.vlen),elen(p.elen) { _regClasses.push_back(&intRegClass); _regClasses.push_back(&floatRegClass); @@ -267,6 +267,11 @@ ISA::ISA(const Params &p) : _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); + fatal_if( p.vlen < p.elen, + "VLEN should be greater or equal", + "than ELEN. Ch. 2RISC-V vector spec."); + + miscRegFile.resize(NUM_MISCREGS); clear(); } diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 13366ef4c3..5581c3b677 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -84,6 +84,16 @@ class ISA : public BaseISA const Addr INVALID_RESERVATION_ADDR = (Addr)-1; std::unordered_map load_reservation_addrs; + /** Length of each vector register in bits. + * VLEN in Ch. 2 of RISC-V vector spec + */ + unsigned vlen; + + /** Length of each vector element in bits. + * ELEN in Ch. 2 of RISC-V vector spec + */ + unsigned elen; + public: using Params = RiscvISAParams; @@ -92,7 +102,8 @@ class ISA : public BaseISA PCStateBase* newPCState(Addr new_inst_addr=0) const override { - return new PCState(new_inst_addr, _rvType, VLENB); + unsigned vlenb = vlen >> 3; + return new PCState(new_inst_addr, _rvType, vlenb); } public: @@ -147,6 +158,10 @@ class ISA : public BaseISA Addr& load_reservation_addr = load_reservation_addrs[cid]; load_reservation_addr = INVALID_RESERVATION_ADDR; } + /** Methods for getting VLEN, VLENB and ELEN values */ + unsigned getVecLenInBits() { return vlen; } + unsigned getVecLenInBytes() { return vlen >> 3; } + unsigned getVecElemLenInBits() { return elen; } }; } // namespace RiscvISA diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 918e85708b..03a7fc415f 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4> bool _compressed = false; RiscvType _rvType = RV64; - uint64_t _vlenb = VLENB; + uint64_t _vlenb = 256; VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial; uint32_t _vl = 0; @@ -74,7 +74,7 @@ class PCState : public GenericISA::UPCState<4> PCState &operator=(const PCState &other) = default; PCState() = default; explicit PCState(Addr addr) { set(addr); } - explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb = VLENB) + explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb) { set(addr); _rvType = rvType; From 8a20f20f791a316c5543af2e5c45bcca94bc3667 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Thu, 14 Sep 2023 09:43:43 +0200 Subject: [PATCH 3/7] arch-riscv: Add vlen component to decoder state This patch add vlen definition to the riscv decoder so it can be used in Vector Instruction Constructors Change-Id: I52292bc261c43562b690062b16d2b323675c2fe0 --- src/arch/riscv/decoder.cc | 2 ++ src/arch/riscv/decoder.hh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index b1e2948e93..3c199b3210 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -41,6 +41,8 @@ namespace RiscvISA Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) { + ISA *isa = dynamic_cast(p.isa); + vlen = isa->getVecLenInBits(); reset(); } diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index c827e85f90..b53c48445d 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -60,6 +60,8 @@ class Decoder : public InstDecoder ExtMachInst emi; uint32_t machInst; + uint32_t vlen; + virtual StaticInstPtr decodeInst(ExtMachInst mach_inst); /// Decode a machine instruction. From 2c9fca7b607dc5bfcc0dcf47141eeafa1f1aadc4 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Wed, 9 Aug 2023 12:39:03 +0200 Subject: [PATCH 4/7] arch-riscv: Add vlen configuration to vector instructions In first place, vlen is added as a member of Vector Macro Instructions where it is needed to split the instruction in Micro Instructions. Then, new PCState methods are used to get dynamic vlen and vlenb values at execution. Finally, vector length data types are fixed to 32 bits so every vlen value is considered. Change-Id: I5b8ceb0d291f456a30a4b0ae2f58601231d33a7a --- src/arch/riscv/insts/vector.cc | 12 +- src/arch/riscv/insts/vector.hh | 128 ++++---- src/arch/riscv/isa.cc | 3 + src/arch/riscv/isa/decoder.isa | 39 ++- src/arch/riscv/isa/formats/vector_arith.isa | 94 +++++- src/arch/riscv/isa/formats/vector_conf.isa | 3 +- src/arch/riscv/isa/formats/vector_mem.isa | 39 ++- src/arch/riscv/isa/templates/vector_arith.isa | 301 +++++++++++------- src/arch/riscv/isa/templates/vector_mem.isa | 218 ++++++++----- src/arch/riscv/pcstate.hh | 2 +- src/arch/riscv/types.hh | 1 - src/arch/riscv/utility.hh | 5 +- 12 files changed, 567 insertions(+), 278 deletions(-) diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 6ecec44dc5..c99e806e9b 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -215,8 +215,9 @@ std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " << registerName(srcRegIdx(1)); if (!machInst.vm) ss << ", v0.t"; return ss.str(); @@ -226,8 +227,9 @@ std::string VlWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } @@ -235,8 +237,9 @@ std::string VseMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; if (!machInst.vm) ss << ", v0.t"; return ss.str(); } @@ -245,8 +248,9 @@ std::string VsWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index b25a6e3a09..58a76e0ab1 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -32,6 +32,7 @@ #include #include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" #include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" @@ -116,11 +117,14 @@ class VectorMacroInst : public RiscvMacroInst protected: uint32_t vl; uint8_t vtype; + uint32_t vlen; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) + OpClass __opClass, uint32_t _vlen = 256) : RiscvMacroInst(mnem, _machInst, __opClass), vl(_machInst.vl), - vtype(_machInst.vtype8) + vtype(_machInst.vtype8), + vlen(_vlen) { this->flags[IsVector] = true; } @@ -128,13 +132,15 @@ class VectorMacroInst : public RiscvMacroInst class VectorMicroInst : public RiscvMicroInst { - protected: - uint8_t microVl; +protected: + uint32_t vlen; + uint32_t microVl; uint8_t microIdx; uint8_t vtype; VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256) : RiscvMicroInst(mnem, _machInst, __opClass), + vlen(_vlen), microVl(_microVl), microIdx(_microIdx), vtype(_machInst.vtype8) @@ -169,7 +175,7 @@ class VectorArithMicroInst : public VectorMicroInst { protected: VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -182,12 +188,11 @@ class VectorArithMacroInst : public VectorMacroInst { protected: VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } - std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; @@ -196,7 +201,7 @@ class VectorVMUNARY0MicroInst : public VectorMicroInst { protected: VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -209,8 +214,8 @@ class VectorVMUNARY0MacroInst : public VectorMacroInst { protected: VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -223,8 +228,8 @@ class VectorSlideMacroInst : public VectorMacroInst { protected: VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -239,7 +244,7 @@ class VectorSlideMicroInst : public VectorMicroInst uint8_t vdIdx; uint8_t vs2Idx; VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , vdIdx(_vdIdx), vs2Idx(_vs2Idx) @@ -256,7 +261,7 @@ class VectorMemMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint32_t _offset) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , offset(_offset) @@ -268,8 +273,8 @@ class VectorMemMacroInst : public VectorMacroInst { protected: VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) {} }; @@ -277,8 +282,8 @@ class VleMacroInst : public VectorMemMacroInst { protected: VleMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -289,8 +294,8 @@ class VseMacroInst : public VectorMemMacroInst { protected: VseMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -302,9 +307,10 @@ class VleMicroInst : public VectorMicroInst protected: Request::Flags memAccessFlags; - VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsLoad] = true; } @@ -319,8 +325,9 @@ class VseMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsStore] = true; } @@ -333,8 +340,8 @@ class VlWholeMacroInst : public VectorMemMacroInst { protected: VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -347,8 +354,10 @@ class VlWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, + uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -359,8 +368,8 @@ class VsWholeMacroInst : public VectorMemMacroInst { protected: VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -373,8 +382,10 @@ class VsWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + OpClass __opClass, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass , _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -385,8 +396,8 @@ class VlStrideMacroInst : public VectorMemMacroInst { protected: VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -399,7 +410,7 @@ class VlStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -413,8 +424,8 @@ class VsStrideMacroInst : public VectorMemMacroInst { protected: VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -427,7 +438,7 @@ class VsStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -441,8 +452,8 @@ class VlIndexMacroInst : public VectorMemMacroInst { protected: VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -473,8 +484,8 @@ class VsIndexMacroInst : public VectorMemMacroInst { protected: VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -516,7 +527,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst { protected: VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -533,10 +544,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst RegId destRegIdxArr[1]; public: - VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, - uint8_t _numSrcs) + uint32_t vlen; + VMaskMergeMicroInst(ExtMachInst extMachInst, + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen) : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0) + VectorIntegerArithOp, 0, 0), + vlen(_vlen) { setRegIdxArrays( reinterpret_cast( @@ -558,26 +571,28 @@ class VMaskMergeMicroInst : public VectorArithMicroInst execute(ExecContext* xc, trace::InstRecord* traceData) const override { vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); auto Vd = tmp_d0.as(); - constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + uint32_t vlenb = pc_ptr->as().vlenb(); + const uint32_t elems_per_vreg = vlenb / sizeof(ElemType); size_t bit_cnt = elems_per_vreg; vreg_t tmp_s; xc->getRegOperand(this, 0, &tmp_s); auto s = tmp_s.as(); // cp the first result and tail - memcpy(Vd, s, VLENB); + memcpy(Vd, s, vlenb); for (uint8_t i = 1; i < this->_numSrcRegs; i++) { xc->getRegOperand(this, i, &tmp_s); s = tmp_s.as(); - if constexpr (elems_per_vreg < 8) { - constexpr uint8_t m = (1 << elems_per_vreg) - 1; - const uint8_t mask = m << (i * elems_per_vreg % 8); + if (elems_per_vreg < 8) { + const uint32_t m = (1 << elems_per_vreg) - 1; + const uint32_t mask = m << (i * elems_per_vreg % 8); // clr & ext bits Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; bit_cnt += elems_per_vreg; } else { - constexpr uint8_t byte_offset = elems_per_vreg / 8; + const uint32_t byte_offset = elems_per_vreg / 8; memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); } } @@ -595,7 +610,8 @@ class VMaskMergeMicroInst : public VectorArithMicroInst for (uint8_t i = 0; i < this->_numSrcRegs; i++) { ss << ", " << registerName(srcRegIdx(i)); } - ss << ", offset:" << VLENB / sizeof(ElemType); + unsigned vlenb = vlen >> 3; + ss << ", offset:" << vlenb / sizeof(ElemType); return ss.str(); } }; diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 5785a14e92..877b795551 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -271,6 +271,9 @@ ISA::ISA(const Params &p) :BaseISA(p), "VLEN should be greater or equal", "than ELEN. Ch. 2RISC-V vector spec."); + inform("RVV enabled, VLEN = %d bits, ELEN = %d bits", + p.vlen, p.elen); + miscRegFile.resize(NUM_MISCREGS); clear(); diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2bd3d33a7e..3d1d396165 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -3110,21 +3110,33 @@ decode QUADRANT default Unknown::unknown() { 0x12: decode VS1 { format VectorIntExtFormat { 0x02: vzext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x03: vsext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x04: vzext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x05: vsext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x06: vzext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x07: vsext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); } @@ -3185,7 +3197,8 @@ decode QUADRANT default Unknown::unknown() { auto Vs2bit = tmp_s2.as(); for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + - vtype_VLMAX(vtype, true) * this->microIdx; + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; bool vs2_lsb = elem_mask(Vs2bit, ei); bool do_mask = elem_mask(v0, ei); bool has_one = false; @@ -3406,7 +3419,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0e: VectorSlideUpFormat::vslideup_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3431,7 +3445,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3662,7 +3677,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslideup_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3687,7 +3703,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3964,7 +3981,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3994,7 +4012,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPFVF, VectorMiscOp); 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -4239,7 +4258,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslide1up_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -4269,7 +4289,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslide1down_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index c462e6c8d4..0d5055ea8f 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -28,6 +28,10 @@ let {{ + def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" + def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" def setDestWrapper(destRegId): return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ "_numTypedDestRegs[VecRegClass]++;\n" @@ -67,7 +71,7 @@ let {{ ''' + code else: return ''' - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; ''' + code def wideningOpRegisterConstraintChecks(code): @@ -178,12 +182,15 @@ def format VectorIntFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -225,12 +232,17 @@ def format VectorIntExtFormat(code, category, *flags) {{ code = loopWrapper(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'ext_div': ext_div}, @@ -293,12 +305,17 @@ def format VectorIntWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -348,12 +365,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), }, @@ -416,12 +438,15 @@ def format VectorIntMaskFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -474,12 +499,17 @@ def format VectorGatherFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'idx_type': idx_type}, @@ -537,12 +567,15 @@ def format VectorFloatFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -581,12 +614,15 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -649,12 +685,17 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -693,12 +734,17 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -738,12 +784,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -783,6 +834,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() code = maskCondWrapper(code) code = eiDeclarePrefix(code) @@ -795,6 +847,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -816,7 +869,8 @@ def format VMvWholeFormat(code, category, *flags) {{ microiop = InstObjParams(name + "_micro", Name + "Micro", 'VMvWholeMicroInst', - {'code': code}, + {'code': code, + 'set_vlen': setVlen()}, flags) header_output = \ @@ -847,6 +901,7 @@ def format ViotaFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() microiop = InstObjParams(name+"_micro", Name+"Micro", @@ -854,6 +909,7 @@ def format ViotaFormat(code, category, *flags){{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -885,12 +941,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() iop = InstObjParams(name, Name, 'VectorNonSplitInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -946,10 +1004,10 @@ def format VectorNonSplitFormat(code, category, *flags) {{ if inst_name == "vfmv" : execute_block = VectorFloatNonSplitExecute.subst(iop) - decode_block = VectorFloatDecodeBlock.subst(iop) + decode_block = VectorFloatNonSplitDecodeBlock.subst(iop) elif inst_name == "vmv" : execute_block = VectorIntNonSplitExecute.subst(iop) - decode_block = VectorIntDecodeBlock.subst(iop) + decode_block = VectorIntNonSplitDecodeBlock.subst(iop) else : error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) @@ -984,6 +1042,8 @@ def format VectorMaskFormat(code, category, *flags) {{ set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_vlenb = setVlenb() + code = loopWrapper(code, micro_inst = False) iop = InstObjParams(name, @@ -992,6 +1052,7 @@ def format VectorMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) # Because of the use of templates, we had to put all parts in header to @@ -1020,6 +1081,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using vu [[maybe_unused]] = std::make_unsigned_t; using vi [[maybe_unused]] = std::make_signed_t; @@ -1030,6 +1094,8 @@ def format VectorReduceIntFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, + 'set_vlen' : set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1062,6 +1128,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using et = ElemType; using vu = decltype(et::v); @@ -1075,6 +1144,8 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1107,6 +1178,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() type_def = ''' using et = ElemType; using vu [[maybe_unused]] = decltype(et::v); @@ -1119,6 +1192,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1162,6 +1237,8 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) @@ -1172,6 +1249,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -1204,12 +1282,16 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -1261,12 +1343,16 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_src_reg_idx += setSrcVm() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa index 457c5ce40d..b997dbec97 100644 --- a/src/arch/riscv/isa/formats/vector_conf.isa +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -157,7 +157,8 @@ def template VConfExecute {{ tc->setMiscReg(MISCREG_VSTART, 0); - VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, vlen); + VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, + vlen); vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen); uint32_t new_vl = getNewVL( current_vl, requested_vl, vlmax, rd_bits, rs1_bits); diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index 113250d5cf..da53d80d0a 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -29,10 +29,15 @@ let {{ +def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" +def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, - decode_template=BasicDecode, exec_template_base='', + decode_template=VMemBaseDecodeBlock, exec_template_base='', # If it's a macroop, the corresponding microops will be # generated. is_macroop=True): @@ -63,7 +68,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, exec_template_base + 'MicroInst', {'ea_code': ea_code, 'memacc_code': memacc_code, - 'postacc_code': postacc_code}, + 'postacc_code': postacc_code, + 'set_vlenb': setVlenb(), + 'set_vlen': setVlen()}, inst_flags) if mem_flags: @@ -90,7 +97,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, def format VleOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -101,7 +110,9 @@ def format VleOp( def format VseOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -134,7 +145,9 @@ def format VsmOp( def format VlWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -145,7 +158,9 @@ def format VlWholeOp( def format VsWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -156,7 +171,9 @@ def format VsWholeOp( def format VlStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -167,7 +184,9 @@ def format VlStrideOp( def format VsStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -186,7 +205,7 @@ def format VlIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VlIndexMacroInst', exec_template_base='VlIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; @@ -200,6 +219,6 @@ def format VsIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VsIndexMacroInst', exec_template_base='VsIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 9b5ee0e7fa..306b1c53f1 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -35,8 +35,8 @@ output header {{ [[maybe_unused]] RiscvISA::vreg_t old_vd; \ [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ xc->getRegOperand(this, (idx), &old_vd); \ - old_Vd = old_vd.as >(); \ - memcpy(Vd, old_Vd, VLENB); + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, vlenb); #define VRM_REQUIRED \ uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ @@ -73,7 +73,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -82,14 +82,14 @@ public: def template VectorIntMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -121,7 +121,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -133,7 +133,7 @@ def template VectorIntMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -173,6 +173,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -190,7 +191,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -214,7 +215,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, @@ -254,13 +255,16 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); auto SEW = vtype_SEW(vtype); - auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + auto index = (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { case 8: { using vext [[maybe_unused]] = int8_t; using vextu [[maybe_unused]] = uint8_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -272,6 +276,8 @@ Fault using vextu [[maybe_unused]] = uint16_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -283,6 +289,8 @@ Fault using vextu [[maybe_unused]] = uint32_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -300,10 +308,10 @@ Fault def template VectorIntDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -316,7 +324,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -325,8 +333,8 @@ public: def template VectorIntWideningMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -337,7 +345,7 @@ template const uint32_t num_microops = 1 << std::max(0, vlmul + 1); int32_t tmp_vl = this->vl; - const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -370,7 +378,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -382,7 +390,7 @@ def template VectorIntWideningMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -415,6 +423,10 @@ Fault return std::make_shared( "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -423,13 +435,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -459,6 +469,11 @@ Fault "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -466,13 +481,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -485,9 +498,9 @@ Fault def template VectorIntWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -500,7 +513,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -508,14 +521,14 @@ public: def template VectorFloatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -547,7 +560,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -557,7 +570,7 @@ public: def template VectorFloatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -598,6 +611,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -611,8 +625,8 @@ Fault def template VectorFloatDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -625,7 +639,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -650,7 +664,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override @@ -693,14 +707,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -737,14 +754,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -757,7 +777,7 @@ Fault def template VectorFloatWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -771,7 +791,7 @@ private: int cnt = 0; %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -781,14 +801,14 @@ public: def template ViotaMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -819,7 +839,7 @@ private: bool vm; int* cnt; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, int* cnt); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -831,7 +851,7 @@ def template ViotaMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, int* cnt) + uint32_t _microVl, uint8_t _microIdx, int* cnt) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -871,6 +891,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -919,6 +940,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -997,7 +1019,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1006,14 +1028,14 @@ public: def template VectorIntMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1028,7 +1050,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1050,7 +1072,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1061,7 +1083,7 @@ def template VectorIntMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1099,10 +1121,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1119,7 +1142,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1128,14 +1151,14 @@ public: def template VectorFloatMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1150,7 +1173,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1171,7 +1194,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1182,7 +1205,7 @@ def template VectorFloatMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1220,10 +1243,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1276,7 +1300,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1287,7 +1311,7 @@ public: def template VMvWholeMicroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1321,7 +1345,8 @@ Fault %(op_decl)s; %(op_rd)s; - for (size_t i = 0; i < (VLEN / 64); i++) { + %(set_vlen)s; + for (size_t i = 0; i < (vlen / 64); i++) { %(code)s; } %(op_wb)s; @@ -1382,6 +1407,7 @@ Fault %(op_decl)s; %(op_rd)s; // TODO: remove it + %(set_vlenb)s; %(copy_old_vd)s; %(code)s; %(op_wb)s; @@ -1489,6 +1515,28 @@ Fault }}; +def template VectorFloatNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + def template VectorReduceMacroDeclare {{ template @@ -1496,7 +1544,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1505,14 +1553,14 @@ public: def template VectorReduceMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1544,7 +1592,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1555,7 +1603,7 @@ def template VectorReduceMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1593,6 +1641,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1600,7 +1650,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { microop_result = f(microop_result, Vs2[i]); } @@ -1625,6 +1676,7 @@ Fault %(type_def)s; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -1638,6 +1690,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1647,7 +1701,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1685,6 +1740,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1694,7 +1751,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1716,7 +1774,7 @@ class %(class_name)s : public %(base_class)s{ private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1725,8 +1783,9 @@ public: def template VectorGatherMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1740,7 +1799,8 @@ template const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; const uint8_t vd_vregs = vs2_vregs; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1778,7 +1838,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1789,7 +1849,7 @@ def template VectorGatherMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1839,17 +1899,19 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; - const uint32_t vlmax = vtype_VLMAX(vtype); + const uint32_t vlmax = vtype_VLMAX(vtype,vlen); constexpr uint8_t vd_eewb = sizeof(ElemType); constexpr uint8_t vs1_eewb = sizeof(IndexType); constexpr uint8_t vs2_eewb = sizeof(ElemType); constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; - [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; - [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; - [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb; + [[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb; + [[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb; [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; @@ -1875,19 +1937,19 @@ def template VectorGatherDecodeBlock {{ switch(machInst.vtype8.vsew) { case 0b000: { using elem_type [[maybe_unused]] = uint8_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b001: { using elem_type [[maybe_unused]] = uint16_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b010: { using elem_type [[maybe_unused]] = uint32_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b011: { using elem_type [[maybe_unused]] = uint64_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } default: GEM5_UNREACHABLE; } @@ -1902,7 +1964,7 @@ private: %(reg_idx_arr_decl)s; bool vxsat = false; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1911,14 +1973,14 @@ public: def template VectorIntVxsatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1954,7 +2016,7 @@ private: bool vm; bool* vxsatptr; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1966,7 +2028,7 @@ def template VectorIntVxsatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -2007,6 +2069,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -2016,7 +2080,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]); } @@ -2038,7 +2103,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -2047,14 +2112,14 @@ public: def template VectorSlideUpMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2082,14 +2147,14 @@ template def template VectorSlideDownMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2126,7 +2191,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -2138,7 +2203,7 @@ def template VectorSlideMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vdIdx, _vs2Idx) { @@ -2174,10 +2239,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -2210,10 +2278,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 2b3b9187bf..fc1b93548c 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -34,6 +34,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -48,6 +49,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -55,16 +57,17 @@ public: def template VleConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; if (micro_vl == 0) { @@ -72,7 +75,7 @@ def template VleConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -93,9 +96,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, - _microIdx) + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -127,12 +131,15 @@ Fault Addr EA; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; RiscvISA::vreg_t tmp_v0; uint8_t *v0; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -150,15 +157,18 @@ Fault } uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); if (fault != NoFault) return fault; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; %(memacc_code)s; @@ -176,10 +186,12 @@ Fault %(class_name)s::initiateAcc(ExecContext* xc, trace::InstRecord* traceData) const { + Addr EA; %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; MISA misa = xc->readMiscReg(MISCREG_ISA); @@ -192,6 +204,7 @@ Fault return std::make_shared("VILL is set", machInst); uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -208,6 +221,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -222,8 +236,9 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; @@ -238,13 +253,13 @@ Fault def template VseConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); @@ -256,7 +271,7 @@ def template VseConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -277,9 +292,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[0]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -326,9 +342,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -375,9 +393,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -412,20 +432,20 @@ Fault def template VlmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); } @@ -439,20 +459,20 @@ def template VlmConstructor {{ def template VsmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); } @@ -466,18 +486,18 @@ def template VsmConstructor {{ def template VsWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -497,9 +517,10 @@ private: RegId destRegIdxArr[0]; RegId srcRegIdxArr[2]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -532,9 +553,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -560,9 +583,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -586,18 +611,19 @@ Fault def template VlWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -617,9 +643,10 @@ private: RegId destRegIdxArr[1]; RegId srcRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s_micro", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -657,6 +684,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; Fault fault = readMemAtomicLE(xc, traceData, EA, @@ -664,7 +693,7 @@ Fault if (fault != NoFault) return fault; - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; i++) { %(memacc_code)s; } @@ -690,6 +719,7 @@ Fault } %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); @@ -706,6 +736,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -713,7 +744,7 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; ++i) { %(memacc_code)s; } @@ -726,13 +757,13 @@ Fault def template VlStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -770,7 +801,7 @@ private: RegId destRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -820,6 +851,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -833,7 +865,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -866,6 +898,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -877,7 +910,7 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -894,6 +927,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -920,12 +954,12 @@ Fault memcpy(Vd, old_Vd, microVl * elem_size); // treat vta as vtu // if (machInst.vtype8.vta == 0) - memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size); } else { - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); } - size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -939,13 +973,13 @@ Fault def template VsStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -983,7 +1017,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -1025,6 +1059,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; @@ -1038,7 +1073,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1074,11 +1109,13 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_store = machInst.vm || elem_mask(v0, ei); if (need_store) { const std::vector byte_enable(mem_size, need_store); @@ -1105,8 +1142,8 @@ Fault def template VlIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1115,7 +1152,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1212,6 +1250,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vd[0]); RiscvISA::vreg_t tmp_v0; @@ -1223,8 +1262,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -1259,6 +1297,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -1270,7 +1309,8 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -1293,10 +1333,11 @@ Fault using vu = std::make_unsigned_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); - RiscvISA::vreg_t old_vd; + RiscvISA::vreg_t old_vd;; decltype(Vd) old_Vd = nullptr; // We treat agnostic as undistrubed xc->getRegOperand(this, 2, &old_vd); @@ -1309,9 +1350,9 @@ Fault v0 = tmp_v0.as(); } - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -1326,8 +1367,8 @@ Fault def template VsIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1336,7 +1377,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; - const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1426,6 +1468,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1438,7 +1481,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1469,6 +1512,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1481,7 +1525,7 @@ Fault constexpr uint8_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1504,6 +1548,10 @@ Fault }}; +def template VMemBaseDecodeBlock {{ + return new %(class_name)s(machInst, vlen); +}}; + def template VMemTemplateDecodeBlock {{ switch(machInst.vtype8.vsew) { @@ -1523,3 +1571,23 @@ switch(machInst.vtype8.vsew) { } }}; + +def template VMemSplitTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst, vlen); + } + case 0b001: { + return new %(class_name)s(machInst, vlen); + } + case 0b010: { + return new %(class_name)s(machInst, vlen); + } + case 0b011: { + return new %(class_name)s(machInst, vlen); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 03a7fc415f..91fb507034 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4> bool _compressed = false; RiscvType _rvType = RV64; - uint64_t _vlenb = 256; + uint64_t _vlenb = 32; VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial; uint32_t _vl = 0; diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 01c600d148..c7edffc2f7 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -42,7 +42,6 @@ #ifndef __ARCH_RISCV_TYPES_HH__ #define __ARCH_RISCV_TYPES_HH__ -#include "arch/riscv/pcstate.hh" #include "base/bitunion.hh" namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 40054aec0f..bac499e523 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -268,12 +268,13 @@ vtype_SEW(const uint64_t vtype) * Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h */ inline uint64_t -vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +vtype_VLMAX(const uint64_t vtype, const uint64_t vlen, + const bool per_reg = false) { int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); lmul = per_reg ? std::min(0, lmul) : lmul; int64_t vsew = bits(vtype, 5, 3); - return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); + return vlen >> (vsew + 3 - lmul); } inline int64_t From 52219e5e6ff227517e7d50c7eacddc3764ccb636 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Sat, 23 Sep 2023 21:42:08 +0200 Subject: [PATCH 5/7] arch-riscv: Add elen configuration to vector config instructions This patch adds elen as a member of vector configuration instructions so it can be used with the especulative execution Change-Id: Iaf79015717a006374c5198aaa36e050edde40cee --- src/arch/riscv/decoder.cc | 1 + src/arch/riscv/decoder.hh | 1 + src/arch/riscv/insts/vector.hh | 7 +++++-- src/arch/riscv/isa/formats/vector_conf.isa | 23 +++++++++++++++++----- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index 3c199b3210..ee5d313587 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -43,6 +43,7 @@ Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) { ISA *isa = dynamic_cast(p.isa); vlen = isa->getVecLenInBits(); + elen = isa->getVecElemLenInBits(); reset(); } diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index b53c48445d..bf863fda22 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -61,6 +61,7 @@ class Decoder : public InstDecoder uint32_t machInst; uint32_t vlen; + uint32_t elen; virtual StaticInstPtr decodeInst(ExtMachInst mach_inst); diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index 58a76e0ab1..c986c99c72 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -69,12 +69,15 @@ class VConfOp : public RiscvStaticInst uint64_t zimm10; uint64_t zimm11; uint64_t uimm; - VConfOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + uint32_t elen; + VConfOp(const char *mnem, ExtMachInst _extMachInst, + uint32_t _elen, OpClass __opClass) : RiscvStaticInst(mnem, _extMachInst, __opClass), bit30(_extMachInst.bit30), bit31(_extMachInst.bit31), zimm10(_extMachInst.zimm_vsetivli), zimm11(_extMachInst.zimm_vsetvli), - uimm(_extMachInst.uimm_vsetivli) + uimm(_extMachInst.uimm_vsetivli), + elen(_elen) { this->flags[IsVector] = true; } diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa index b997dbec97..6280e5679b 100644 --- a/src/arch/riscv/isa/formats/vector_conf.isa +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -42,8 +42,8 @@ def format VConfOp(code, write_code, declare_class, branch_class, *flags) {{ branchTargetTemplate = eval(branch_class) header_output = declareTemplate.subst(iop) - decoder_output = BasicConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) + decoder_output = VConfConstructor.subst(iop) + decode_block = VConfDecodeBlock.subst(iop) exec_output = VConfExecute.subst(iop) + branchTargetTemplate.subst(iop) }}; @@ -61,7 +61,7 @@ def template VSetVlDeclare {{ public: /// Constructor. - %(class_name)s(ExtMachInst machInst); + %(class_name)s(ExtMachInst machInst, uint32_t elen); Fault execute(ExecContext *, trace::InstRecord *) const override; std::unique_ptr branchTarget( ThreadContext *tc) const override; @@ -86,7 +86,7 @@ def template VSetiVliDeclare {{ public: /// Constructor. - %(class_name)s(ExtMachInst machInst); + %(class_name)s(ExtMachInst machInst, uint32_t elen); Fault execute(ExecContext *, trace::InstRecord *) const override; std::unique_ptr branchTarget( const PCStateBase &branch_pc) const override; @@ -97,6 +97,19 @@ def template VSetiVliDeclare {{ }; }}; +def template VConfConstructor {{ +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen) + : %(base_class)s("%(mnemonic)s", _machInst, _elen, %(op_class)s) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template VConfDecodeBlock {{ + return new %(class_name)s(machInst,elen); +}}; + def template VConfExecute {{ VTYPE %(class_name)s::getNewVtype( @@ -112,7 +125,7 @@ def template VConfExecute {{ uint32_t newVill = !(vflmul >= 0.125 && vflmul <= 8) || - sew > std::min(vflmul, 1.0f) * ELEN || + sew > std::min(vflmul, 1.0f) * elen || bits(reqVtype, 62, 8) != 0; if (newVill) { newVtype = 0; From bfb295ac3f047301d5557a75c7b93cdb7179cd2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Armejach?= Date: Wed, 4 Oct 2023 14:49:01 +0200 Subject: [PATCH 6/7] util: cpt_upgrader fix vregs size for #PR171 * Make cpt_upgrader set vregs of size MaxVecLenInBytes Change-Id: Ie7e00d9bf42b705a0fb30c9d203933fc2e9bdcd9 --- util/cpt_upgraders/riscv-dyn-vlen.py | 49 ++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 util/cpt_upgraders/riscv-dyn-vlen.py diff --git a/util/cpt_upgraders/riscv-dyn-vlen.py b/util/cpt_upgraders/riscv-dyn-vlen.py new file mode 100644 index 0000000000..ea2de9d19d --- /dev/null +++ b/util/cpt_upgraders/riscv-dyn-vlen.py @@ -0,0 +1,49 @@ +# Copyright (c) 2023 Barcelona Supercomputing Center (BSC) +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def upgrader(cpt): + """ + Update the checkpoint to support initial RVV implemtation. + The updater is taking the following steps. + + Set vector registers to occupy 327680 bytes (40regs * 8192bytes). + Vector registers now ocupy this space regardless of VLEN as the + VecRegContainer is always MaxVecLenInBytes. + """ + + for sec in cpt.sections(): + import re + + # Search for all XC sections + + if re.search(r".*processor.*\.core.*\.xc.*", sec): + # Updating RVV vector registers (dummy values) + mr = cpt.get(sec, "regs.vector").split() + if len(mr) != 327680: + cpt.set( + sec, "regs.vector", " ".join("0" for i in range(327680)) + ) From edf1d692572ba682457806c38be54dcbbe43eed7 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Thu, 12 Oct 2023 20:02:41 +0200 Subject: [PATCH 7/7] arch-riscv: Define vlwhole/vswhole mem acceses using vlen. This patch fixes the size of the memory acceses in vswhole and vlwhole instructions to the maximum vector length. Change-Id: Ib86b5356d9f1dfa277cb4b367893e3b08242f93e --- src/arch/generic/memhelpers.hh | 81 +++++++++++++++++++++ src/arch/riscv/isa/templates/vector_mem.isa | 10 ++- 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/src/arch/generic/memhelpers.hh b/src/arch/generic/memhelpers.hh index d5684a6af9..9cdd2a56eb 100644 --- a/src/arch/generic/memhelpers.hh +++ b/src/arch/generic/memhelpers.hh @@ -124,6 +124,24 @@ readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, return fault; } +/// Read from memory in atomic mode. +template +Fault +readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, + size_t size, Request::Flags flags) +{ + memset(&mem, 0, size); + static const std::vector byte_enable(size, true); + Fault fault = readMemAtomic(xc, addr, (uint8_t*)&mem, + size, flags, byte_enable); + if (fault == NoFault) { + mem = gtoh(mem, Order); + if (traceData) + traceData->setData(mem); + } + return fault; +} + template Fault readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, @@ -133,6 +151,16 @@ readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, xc, traceData, addr, mem, flags); } +template +Fault +readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, + size_t size, Request::Flags flags) +{ + return readMemAtomic( + xc, traceData, addr, mem, size, flags); +} + + template Fault readMemAtomicBE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, @@ -165,6 +193,20 @@ writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, sizeof(MemT), flags, res, byte_enable); } +template +Fault +writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, + size_t size, Request::Flags flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(mem); + } + mem = htog(mem, Order); + static const std::vector byte_enable(size, true); + return writeMemTiming(xc, (uint8_t*)&mem, addr, + size, flags, res, byte_enable); +} + template Fault writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, @@ -174,6 +216,15 @@ writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, xc, traceData, mem, addr, flags, res); } +template +Fault +writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, + size_t size, Request::Flags flags, uint64_t *res) +{ + return writeMemTiming( + xc, traceData, mem, addr, size, flags, res); +} + template Fault writeMemTimingBE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, @@ -214,6 +265,27 @@ writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem, return fault; } +template +Fault +writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem, + Addr addr, size_t size, Request::Flags flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(mem); + } + MemT host_mem = htog(mem, Order); + static const std::vector byte_enable(size, true); + Fault fault = writeMemAtomic(xc, (uint8_t*)&host_mem, + addr, size, flags, res, byte_enable); + if (fault == NoFault && res != NULL) { + if (flags & Request::MEM_SWAP || flags & Request::MEM_SWAP_COND) + *(MemT *)res = gtoh(*(MemT *)res, Order); + else + *res = gtoh(*res, Order); + } + return fault; +} + template Fault writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, @@ -223,6 +295,15 @@ writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, xc, traceData, mem, addr, flags, res); } +template +Fault +writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, + size_t size, Addr addr, Request::Flags flags, uint64_t *res) +{ + return writeMemAtomic( + xc, traceData, mem, addr, size, flags, res); +} + template Fault writeMemAtomicBE(XC *xc, trace::InstRecord *traceData, const MemT &mem, diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index fc1b93548c..8cbab044ec 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -562,7 +562,7 @@ Fault } Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem), - EA, memAccessFlags, nullptr); + vlenb, EA, memAccessFlags, nullptr); return fault; } @@ -592,7 +592,7 @@ Fault } Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem), - EA, memAccessFlags, nullptr); + EA, vlenb, memAccessFlags, nullptr); return fault; } @@ -689,7 +689,8 @@ Fault %(ea_code)s; Fault fault = readMemAtomicLE(xc, traceData, EA, - *(vreg_t::Container*)(&Mem), memAccessFlags); + *(vreg_t::Container*)(&Mem), vlenb, + memAccessFlags); if (fault != NoFault) return fault; @@ -722,7 +723,8 @@ Fault %(set_vlenb)s; %(ea_code)s; - Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + const std::vector byte_enable(vlenb, true); + Fault fault = initiateMemRead(xc, EA, vlenb, memAccessFlags, byte_enable); return fault; }