From 2c9fca7b607dc5bfcc0dcf47141eeafa1f1aadc4 Mon Sep 17 00:00:00 2001 From: Alvaro Moreno Date: Wed, 9 Aug 2023 12:39:03 +0200 Subject: [PATCH] arch-riscv: Add vlen configuration to vector instructions In first place, vlen is added as a member of Vector Macro Instructions where it is needed to split the instruction in Micro Instructions. Then, new PCState methods are used to get dynamic vlen and vlenb values at execution. Finally, vector length data types are fixed to 32 bits so every vlen value is considered. Change-Id: I5b8ceb0d291f456a30a4b0ae2f58601231d33a7a --- src/arch/riscv/insts/vector.cc | 12 +- src/arch/riscv/insts/vector.hh | 128 ++++---- src/arch/riscv/isa.cc | 3 + src/arch/riscv/isa/decoder.isa | 39 ++- src/arch/riscv/isa/formats/vector_arith.isa | 94 +++++- src/arch/riscv/isa/formats/vector_conf.isa | 3 +- src/arch/riscv/isa/formats/vector_mem.isa | 39 ++- src/arch/riscv/isa/templates/vector_arith.isa | 301 +++++++++++------- src/arch/riscv/isa/templates/vector_mem.isa | 218 ++++++++----- src/arch/riscv/pcstate.hh | 2 +- src/arch/riscv/types.hh | 1 - src/arch/riscv/utility.hh | 5 +- 12 files changed, 567 insertions(+), 278 deletions(-) diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 6ecec44dc5..c99e806e9b 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -215,8 +215,9 @@ std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " << registerName(srcRegIdx(1)); if (!machInst.vm) ss << ", v0.t"; return ss.str(); @@ -226,8 +227,9 @@ std::string VlWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } @@ -235,8 +237,9 @@ std::string VseMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; if (!machInst.vm) ss << ", v0.t"; return ss.str(); } @@ -245,8 +248,9 @@ std::string VsWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index b25a6e3a09..58a76e0ab1 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -32,6 +32,7 @@ #include #include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" #include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" @@ -116,11 +117,14 @@ class VectorMacroInst : public RiscvMacroInst protected: uint32_t vl; uint8_t vtype; + uint32_t vlen; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) + OpClass __opClass, uint32_t _vlen = 256) : RiscvMacroInst(mnem, _machInst, __opClass), vl(_machInst.vl), - vtype(_machInst.vtype8) + vtype(_machInst.vtype8), + vlen(_vlen) { this->flags[IsVector] = true; } @@ -128,13 +132,15 @@ class VectorMacroInst : public RiscvMacroInst class VectorMicroInst : public RiscvMicroInst { - protected: - uint8_t microVl; +protected: + uint32_t vlen; + uint32_t microVl; uint8_t microIdx; uint8_t vtype; VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256) : RiscvMicroInst(mnem, _machInst, __opClass), + vlen(_vlen), microVl(_microVl), microIdx(_microIdx), vtype(_machInst.vtype8) @@ -169,7 +175,7 @@ class VectorArithMicroInst : public VectorMicroInst { protected: VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -182,12 +188,11 @@ class VectorArithMacroInst : public VectorMacroInst { protected: VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } - std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; @@ -196,7 +201,7 @@ class VectorVMUNARY0MicroInst : public VectorMicroInst { protected: VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -209,8 +214,8 @@ class VectorVMUNARY0MacroInst : public VectorMacroInst { protected: VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -223,8 +228,8 @@ class VectorSlideMacroInst : public VectorMacroInst { protected: VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -239,7 +244,7 @@ class VectorSlideMicroInst : public VectorMicroInst uint8_t vdIdx; uint8_t vs2Idx; VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , vdIdx(_vdIdx), vs2Idx(_vs2Idx) @@ -256,7 +261,7 @@ class VectorMemMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint32_t _offset) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , offset(_offset) @@ -268,8 +273,8 @@ class VectorMemMacroInst : public VectorMacroInst { protected: VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) {} }; @@ -277,8 +282,8 @@ class VleMacroInst : public VectorMemMacroInst { protected: VleMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -289,8 +294,8 @@ class VseMacroInst : public VectorMemMacroInst { protected: VseMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -302,9 +307,10 @@ class VleMicroInst : public VectorMicroInst protected: Request::Flags memAccessFlags; - VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsLoad] = true; } @@ -319,8 +325,9 @@ class VseMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsStore] = true; } @@ -333,8 +340,8 @@ class VlWholeMacroInst : public VectorMemMacroInst { protected: VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -347,8 +354,10 @@ class VlWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, + uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -359,8 +368,8 @@ class VsWholeMacroInst : public VectorMemMacroInst { protected: VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -373,8 +382,10 @@ class VsWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + OpClass __opClass, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass , _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -385,8 +396,8 @@ class VlStrideMacroInst : public VectorMemMacroInst { protected: VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -399,7 +410,7 @@ class VlStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -413,8 +424,8 @@ class VsStrideMacroInst : public VectorMemMacroInst { protected: VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -427,7 +438,7 @@ class VsStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -441,8 +452,8 @@ class VlIndexMacroInst : public VectorMemMacroInst { protected: VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -473,8 +484,8 @@ class VsIndexMacroInst : public VectorMemMacroInst { protected: VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -516,7 +527,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst { protected: VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -533,10 +544,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst RegId destRegIdxArr[1]; public: - VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, - uint8_t _numSrcs) + uint32_t vlen; + VMaskMergeMicroInst(ExtMachInst extMachInst, + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen) : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0) + VectorIntegerArithOp, 0, 0), + vlen(_vlen) { setRegIdxArrays( reinterpret_cast( @@ -558,26 +571,28 @@ class VMaskMergeMicroInst : public VectorArithMicroInst execute(ExecContext* xc, trace::InstRecord* traceData) const override { vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); auto Vd = tmp_d0.as(); - constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + uint32_t vlenb = pc_ptr->as().vlenb(); + const uint32_t elems_per_vreg = vlenb / sizeof(ElemType); size_t bit_cnt = elems_per_vreg; vreg_t tmp_s; xc->getRegOperand(this, 0, &tmp_s); auto s = tmp_s.as(); // cp the first result and tail - memcpy(Vd, s, VLENB); + memcpy(Vd, s, vlenb); for (uint8_t i = 1; i < this->_numSrcRegs; i++) { xc->getRegOperand(this, i, &tmp_s); s = tmp_s.as(); - if constexpr (elems_per_vreg < 8) { - constexpr uint8_t m = (1 << elems_per_vreg) - 1; - const uint8_t mask = m << (i * elems_per_vreg % 8); + if (elems_per_vreg < 8) { + const uint32_t m = (1 << elems_per_vreg) - 1; + const uint32_t mask = m << (i * elems_per_vreg % 8); // clr & ext bits Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; bit_cnt += elems_per_vreg; } else { - constexpr uint8_t byte_offset = elems_per_vreg / 8; + const uint32_t byte_offset = elems_per_vreg / 8; memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); } } @@ -595,7 +610,8 @@ class VMaskMergeMicroInst : public VectorArithMicroInst for (uint8_t i = 0; i < this->_numSrcRegs; i++) { ss << ", " << registerName(srcRegIdx(i)); } - ss << ", offset:" << VLENB / sizeof(ElemType); + unsigned vlenb = vlen >> 3; + ss << ", offset:" << vlenb / sizeof(ElemType); return ss.str(); } }; diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 5785a14e92..877b795551 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -271,6 +271,9 @@ ISA::ISA(const Params &p) :BaseISA(p), "VLEN should be greater or equal", "than ELEN. Ch. 2RISC-V vector spec."); + inform("RVV enabled, VLEN = %d bits, ELEN = %d bits", + p.vlen, p.elen); + miscRegFile.resize(NUM_MISCREGS); clear(); diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2bd3d33a7e..3d1d396165 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -3110,21 +3110,33 @@ decode QUADRANT default Unknown::unknown() { 0x12: decode VS1 { format VectorIntExtFormat { 0x02: vzext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x03: vsext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x04: vzext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x05: vsext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x06: vzext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x07: vsext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); } @@ -3185,7 +3197,8 @@ decode QUADRANT default Unknown::unknown() { auto Vs2bit = tmp_s2.as(); for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + - vtype_VLMAX(vtype, true) * this->microIdx; + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; bool vs2_lsb = elem_mask(Vs2bit, ei); bool do_mask = elem_mask(v0, ei); bool has_one = false; @@ -3406,7 +3419,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0e: VectorSlideUpFormat::vslideup_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3431,7 +3445,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3662,7 +3677,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslideup_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3687,7 +3703,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3964,7 +3981,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3994,7 +4012,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPFVF, VectorMiscOp); 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -4239,7 +4258,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslide1up_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -4269,7 +4289,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslide1down_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index c462e6c8d4..0d5055ea8f 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -28,6 +28,10 @@ let {{ + def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" + def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" def setDestWrapper(destRegId): return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ "_numTypedDestRegs[VecRegClass]++;\n" @@ -67,7 +71,7 @@ let {{ ''' + code else: return ''' - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; ''' + code def wideningOpRegisterConstraintChecks(code): @@ -178,12 +182,15 @@ def format VectorIntFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -225,12 +232,17 @@ def format VectorIntExtFormat(code, category, *flags) {{ code = loopWrapper(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'ext_div': ext_div}, @@ -293,12 +305,17 @@ def format VectorIntWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -348,12 +365,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), }, @@ -416,12 +438,15 @@ def format VectorIntMaskFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -474,12 +499,17 @@ def format VectorGatherFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'idx_type': idx_type}, @@ -537,12 +567,15 @@ def format VectorFloatFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -581,12 +614,15 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -649,12 +685,17 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -693,12 +734,17 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -738,12 +784,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -783,6 +834,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() code = maskCondWrapper(code) code = eiDeclarePrefix(code) @@ -795,6 +847,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -816,7 +869,8 @@ def format VMvWholeFormat(code, category, *flags) {{ microiop = InstObjParams(name + "_micro", Name + "Micro", 'VMvWholeMicroInst', - {'code': code}, + {'code': code, + 'set_vlen': setVlen()}, flags) header_output = \ @@ -847,6 +901,7 @@ def format ViotaFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() microiop = InstObjParams(name+"_micro", Name+"Micro", @@ -854,6 +909,7 @@ def format ViotaFormat(code, category, *flags){{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -885,12 +941,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() iop = InstObjParams(name, Name, 'VectorNonSplitInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -946,10 +1004,10 @@ def format VectorNonSplitFormat(code, category, *flags) {{ if inst_name == "vfmv" : execute_block = VectorFloatNonSplitExecute.subst(iop) - decode_block = VectorFloatDecodeBlock.subst(iop) + decode_block = VectorFloatNonSplitDecodeBlock.subst(iop) elif inst_name == "vmv" : execute_block = VectorIntNonSplitExecute.subst(iop) - decode_block = VectorIntDecodeBlock.subst(iop) + decode_block = VectorIntNonSplitDecodeBlock.subst(iop) else : error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) @@ -984,6 +1042,8 @@ def format VectorMaskFormat(code, category, *flags) {{ set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_vlenb = setVlenb() + code = loopWrapper(code, micro_inst = False) iop = InstObjParams(name, @@ -992,6 +1052,7 @@ def format VectorMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) # Because of the use of templates, we had to put all parts in header to @@ -1020,6 +1081,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using vu [[maybe_unused]] = std::make_unsigned_t; using vi [[maybe_unused]] = std::make_signed_t; @@ -1030,6 +1094,8 @@ def format VectorReduceIntFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, + 'set_vlen' : set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1062,6 +1128,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using et = ElemType; using vu = decltype(et::v); @@ -1075,6 +1144,8 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1107,6 +1178,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() type_def = ''' using et = ElemType; using vu [[maybe_unused]] = decltype(et::v); @@ -1119,6 +1192,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1162,6 +1237,8 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) @@ -1172,6 +1249,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -1204,12 +1282,16 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -1261,12 +1343,16 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_src_reg_idx += setSrcVm() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa index 457c5ce40d..b997dbec97 100644 --- a/src/arch/riscv/isa/formats/vector_conf.isa +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -157,7 +157,8 @@ def template VConfExecute {{ tc->setMiscReg(MISCREG_VSTART, 0); - VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, vlen); + VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, + vlen); vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen); uint32_t new_vl = getNewVL( current_vl, requested_vl, vlmax, rd_bits, rs1_bits); diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index 113250d5cf..da53d80d0a 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -29,10 +29,15 @@ let {{ +def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" +def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, - decode_template=BasicDecode, exec_template_base='', + decode_template=VMemBaseDecodeBlock, exec_template_base='', # If it's a macroop, the corresponding microops will be # generated. is_macroop=True): @@ -63,7 +68,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, exec_template_base + 'MicroInst', {'ea_code': ea_code, 'memacc_code': memacc_code, - 'postacc_code': postacc_code}, + 'postacc_code': postacc_code, + 'set_vlenb': setVlenb(), + 'set_vlen': setVlen()}, inst_flags) if mem_flags: @@ -90,7 +97,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, def format VleOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -101,7 +110,9 @@ def format VleOp( def format VseOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -134,7 +145,9 @@ def format VsmOp( def format VlWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -145,7 +158,9 @@ def format VlWholeOp( def format VsWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -156,7 +171,9 @@ def format VsWholeOp( def format VlStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -167,7 +184,9 @@ def format VlStrideOp( def format VsStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -186,7 +205,7 @@ def format VlIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VlIndexMacroInst', exec_template_base='VlIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; @@ -200,6 +219,6 @@ def format VsIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VsIndexMacroInst', exec_template_base='VsIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 9b5ee0e7fa..306b1c53f1 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -35,8 +35,8 @@ output header {{ [[maybe_unused]] RiscvISA::vreg_t old_vd; \ [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ xc->getRegOperand(this, (idx), &old_vd); \ - old_Vd = old_vd.as >(); \ - memcpy(Vd, old_Vd, VLENB); + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, vlenb); #define VRM_REQUIRED \ uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ @@ -73,7 +73,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -82,14 +82,14 @@ public: def template VectorIntMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -121,7 +121,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -133,7 +133,7 @@ def template VectorIntMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -173,6 +173,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -190,7 +191,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -214,7 +215,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, @@ -254,13 +255,16 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); auto SEW = vtype_SEW(vtype); - auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + auto index = (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { case 8: { using vext [[maybe_unused]] = int8_t; using vextu [[maybe_unused]] = uint8_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -272,6 +276,8 @@ Fault using vextu [[maybe_unused]] = uint16_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -283,6 +289,8 @@ Fault using vextu [[maybe_unused]] = uint32_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -300,10 +308,10 @@ Fault def template VectorIntDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -316,7 +324,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -325,8 +333,8 @@ public: def template VectorIntWideningMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -337,7 +345,7 @@ template const uint32_t num_microops = 1 << std::max(0, vlmul + 1); int32_t tmp_vl = this->vl; - const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -370,7 +378,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -382,7 +390,7 @@ def template VectorIntWideningMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -415,6 +423,10 @@ Fault return std::make_shared( "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -423,13 +435,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -459,6 +469,11 @@ Fault "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -466,13 +481,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -485,9 +498,9 @@ Fault def template VectorIntWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -500,7 +513,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -508,14 +521,14 @@ public: def template VectorFloatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -547,7 +560,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -557,7 +570,7 @@ public: def template VectorFloatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -598,6 +611,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -611,8 +625,8 @@ Fault def template VectorFloatDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -625,7 +639,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -650,7 +664,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override @@ -693,14 +707,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -737,14 +754,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -757,7 +777,7 @@ Fault def template VectorFloatWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -771,7 +791,7 @@ private: int cnt = 0; %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -781,14 +801,14 @@ public: def template ViotaMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -819,7 +839,7 @@ private: bool vm; int* cnt; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, int* cnt); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -831,7 +851,7 @@ def template ViotaMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, int* cnt) + uint32_t _microVl, uint8_t _microIdx, int* cnt) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -871,6 +891,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -919,6 +940,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -997,7 +1019,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1006,14 +1028,14 @@ public: def template VectorIntMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1028,7 +1050,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1050,7 +1072,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1061,7 +1083,7 @@ def template VectorIntMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1099,10 +1121,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1119,7 +1142,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1128,14 +1151,14 @@ public: def template VectorFloatMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1150,7 +1173,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1171,7 +1194,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1182,7 +1205,7 @@ def template VectorFloatMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1220,10 +1243,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1276,7 +1300,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1287,7 +1311,7 @@ public: def template VMvWholeMicroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1321,7 +1345,8 @@ Fault %(op_decl)s; %(op_rd)s; - for (size_t i = 0; i < (VLEN / 64); i++) { + %(set_vlen)s; + for (size_t i = 0; i < (vlen / 64); i++) { %(code)s; } %(op_wb)s; @@ -1382,6 +1407,7 @@ Fault %(op_decl)s; %(op_rd)s; // TODO: remove it + %(set_vlenb)s; %(copy_old_vd)s; %(code)s; %(op_wb)s; @@ -1489,6 +1515,28 @@ Fault }}; +def template VectorFloatNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + def template VectorReduceMacroDeclare {{ template @@ -1496,7 +1544,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1505,14 +1553,14 @@ public: def template VectorReduceMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1544,7 +1592,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1555,7 +1603,7 @@ def template VectorReduceMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1593,6 +1641,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1600,7 +1650,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { microop_result = f(microop_result, Vs2[i]); } @@ -1625,6 +1676,7 @@ Fault %(type_def)s; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -1638,6 +1690,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1647,7 +1701,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1685,6 +1740,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1694,7 +1751,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1716,7 +1774,7 @@ class %(class_name)s : public %(base_class)s{ private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1725,8 +1783,9 @@ public: def template VectorGatherMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1740,7 +1799,8 @@ template const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; const uint8_t vd_vregs = vs2_vregs; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1778,7 +1838,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1789,7 +1849,7 @@ def template VectorGatherMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1839,17 +1899,19 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; - const uint32_t vlmax = vtype_VLMAX(vtype); + const uint32_t vlmax = vtype_VLMAX(vtype,vlen); constexpr uint8_t vd_eewb = sizeof(ElemType); constexpr uint8_t vs1_eewb = sizeof(IndexType); constexpr uint8_t vs2_eewb = sizeof(ElemType); constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; - [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; - [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; - [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb; + [[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb; + [[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb; [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; @@ -1875,19 +1937,19 @@ def template VectorGatherDecodeBlock {{ switch(machInst.vtype8.vsew) { case 0b000: { using elem_type [[maybe_unused]] = uint8_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b001: { using elem_type [[maybe_unused]] = uint16_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b010: { using elem_type [[maybe_unused]] = uint32_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b011: { using elem_type [[maybe_unused]] = uint64_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } default: GEM5_UNREACHABLE; } @@ -1902,7 +1964,7 @@ private: %(reg_idx_arr_decl)s; bool vxsat = false; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1911,14 +1973,14 @@ public: def template VectorIntVxsatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1954,7 +2016,7 @@ private: bool vm; bool* vxsatptr; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1966,7 +2028,7 @@ def template VectorIntVxsatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -2007,6 +2069,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -2016,7 +2080,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]); } @@ -2038,7 +2103,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -2047,14 +2112,14 @@ public: def template VectorSlideUpMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2082,14 +2147,14 @@ template def template VectorSlideDownMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2126,7 +2191,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -2138,7 +2203,7 @@ def template VectorSlideMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vdIdx, _vs2Idx) { @@ -2174,10 +2239,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -2210,10 +2278,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 2b3b9187bf..fc1b93548c 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -34,6 +34,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -48,6 +49,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -55,16 +57,17 @@ public: def template VleConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; if (micro_vl == 0) { @@ -72,7 +75,7 @@ def template VleConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -93,9 +96,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, - _microIdx) + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -127,12 +131,15 @@ Fault Addr EA; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; RiscvISA::vreg_t tmp_v0; uint8_t *v0; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -150,15 +157,18 @@ Fault } uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); if (fault != NoFault) return fault; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; %(memacc_code)s; @@ -176,10 +186,12 @@ Fault %(class_name)s::initiateAcc(ExecContext* xc, trace::InstRecord* traceData) const { + Addr EA; %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; MISA misa = xc->readMiscReg(MISCREG_ISA); @@ -192,6 +204,7 @@ Fault return std::make_shared("VILL is set", machInst); uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -208,6 +221,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -222,8 +236,9 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; @@ -238,13 +253,13 @@ Fault def template VseConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); @@ -256,7 +271,7 @@ def template VseConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -277,9 +292,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[0]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -326,9 +342,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -375,9 +393,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -412,20 +432,20 @@ Fault def template VlmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); } @@ -439,20 +459,20 @@ def template VlmConstructor {{ def template VsmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); } @@ -466,18 +486,18 @@ def template VsmConstructor {{ def template VsWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -497,9 +517,10 @@ private: RegId destRegIdxArr[0]; RegId srcRegIdxArr[2]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -532,9 +553,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -560,9 +583,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -586,18 +611,19 @@ Fault def template VlWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -617,9 +643,10 @@ private: RegId destRegIdxArr[1]; RegId srcRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s_micro", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -657,6 +684,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; Fault fault = readMemAtomicLE(xc, traceData, EA, @@ -664,7 +693,7 @@ Fault if (fault != NoFault) return fault; - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; i++) { %(memacc_code)s; } @@ -690,6 +719,7 @@ Fault } %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); @@ -706,6 +736,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -713,7 +744,7 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; ++i) { %(memacc_code)s; } @@ -726,13 +757,13 @@ Fault def template VlStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -770,7 +801,7 @@ private: RegId destRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -820,6 +851,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -833,7 +865,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -866,6 +898,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -877,7 +910,7 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -894,6 +927,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -920,12 +954,12 @@ Fault memcpy(Vd, old_Vd, microVl * elem_size); // treat vta as vtu // if (machInst.vtype8.vta == 0) - memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size); } else { - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); } - size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -939,13 +973,13 @@ Fault def template VsStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -983,7 +1017,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -1025,6 +1059,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; @@ -1038,7 +1073,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1074,11 +1109,13 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_store = machInst.vm || elem_mask(v0, ei); if (need_store) { const std::vector byte_enable(mem_size, need_store); @@ -1105,8 +1142,8 @@ Fault def template VlIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1115,7 +1152,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1212,6 +1250,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vd[0]); RiscvISA::vreg_t tmp_v0; @@ -1223,8 +1262,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -1259,6 +1297,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -1270,7 +1309,8 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -1293,10 +1333,11 @@ Fault using vu = std::make_unsigned_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); - RiscvISA::vreg_t old_vd; + RiscvISA::vreg_t old_vd;; decltype(Vd) old_Vd = nullptr; // We treat agnostic as undistrubed xc->getRegOperand(this, 2, &old_vd); @@ -1309,9 +1350,9 @@ Fault v0 = tmp_v0.as(); } - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -1326,8 +1367,8 @@ Fault def template VsIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1336,7 +1377,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; - const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1426,6 +1468,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1438,7 +1481,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1469,6 +1512,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1481,7 +1525,7 @@ Fault constexpr uint8_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1504,6 +1548,10 @@ Fault }}; +def template VMemBaseDecodeBlock {{ + return new %(class_name)s(machInst, vlen); +}}; + def template VMemTemplateDecodeBlock {{ switch(machInst.vtype8.vsew) { @@ -1523,3 +1571,23 @@ switch(machInst.vtype8.vsew) { } }}; + +def template VMemSplitTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst, vlen); + } + case 0b001: { + return new %(class_name)s(machInst, vlen); + } + case 0b010: { + return new %(class_name)s(machInst, vlen); + } + case 0b011: { + return new %(class_name)s(machInst, vlen); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 03a7fc415f..91fb507034 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4> bool _compressed = false; RiscvType _rvType = RV64; - uint64_t _vlenb = 256; + uint64_t _vlenb = 32; VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial; uint32_t _vl = 0; diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 01c600d148..c7edffc2f7 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -42,7 +42,6 @@ #ifndef __ARCH_RISCV_TYPES_HH__ #define __ARCH_RISCV_TYPES_HH__ -#include "arch/riscv/pcstate.hh" #include "base/bitunion.hh" namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 40054aec0f..bac499e523 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -268,12 +268,13 @@ vtype_SEW(const uint64_t vtype) * Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h */ inline uint64_t -vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +vtype_VLMAX(const uint64_t vtype, const uint64_t vlen, + const bool per_reg = false) { int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); lmul = per_reg ? std::min(0, lmul) : lmul; int64_t vsew = bits(vtype, 5, 3); - return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); + return vlen >> (vsew + 3 - lmul); } inline int64_t