diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 6ecec44dc5..c99e806e9b 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -215,8 +215,9 @@ std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " << registerName(srcRegIdx(1)); if (!machInst.vm) ss << ", v0.t"; return ss.str(); @@ -226,8 +227,9 @@ std::string VlWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } @@ -235,8 +237,9 @@ std::string VseMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; if (!machInst.vm) ss << ", v0.t"; return ss.str(); } @@ -245,8 +248,9 @@ std::string VsWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index b25a6e3a09..58a76e0ab1 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -32,6 +32,7 @@ #include #include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" #include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" @@ -116,11 +117,14 @@ class VectorMacroInst : public RiscvMacroInst protected: uint32_t vl; uint8_t vtype; + uint32_t vlen; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) + OpClass __opClass, uint32_t _vlen = 256) : RiscvMacroInst(mnem, _machInst, __opClass), vl(_machInst.vl), - vtype(_machInst.vtype8) + vtype(_machInst.vtype8), + vlen(_vlen) { this->flags[IsVector] = true; } @@ -128,13 +132,15 @@ class VectorMacroInst : public RiscvMacroInst class VectorMicroInst : public RiscvMicroInst { - protected: - uint8_t microVl; +protected: + uint32_t vlen; + uint32_t microVl; uint8_t microIdx; uint8_t vtype; VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256) : RiscvMicroInst(mnem, _machInst, __opClass), + vlen(_vlen), microVl(_microVl), microIdx(_microIdx), vtype(_machInst.vtype8) @@ -169,7 +175,7 @@ class VectorArithMicroInst : public VectorMicroInst { protected: VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -182,12 +188,11 @@ class VectorArithMacroInst : public VectorMacroInst { protected: VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } - std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; @@ -196,7 +201,7 @@ class VectorVMUNARY0MicroInst : public VectorMicroInst { protected: VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -209,8 +214,8 @@ class VectorVMUNARY0MacroInst : public VectorMacroInst { protected: VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -223,8 +228,8 @@ class VectorSlideMacroInst : public VectorMacroInst { protected: VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -239,7 +244,7 @@ class VectorSlideMicroInst : public VectorMicroInst uint8_t vdIdx; uint8_t vs2Idx; VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , vdIdx(_vdIdx), vs2Idx(_vs2Idx) @@ -256,7 +261,7 @@ class VectorMemMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint32_t _offset) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , offset(_offset) @@ -268,8 +273,8 @@ class VectorMemMacroInst : public VectorMacroInst { protected: VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) {} }; @@ -277,8 +282,8 @@ class VleMacroInst : public VectorMemMacroInst { protected: VleMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -289,8 +294,8 @@ class VseMacroInst : public VectorMemMacroInst { protected: VseMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -302,9 +307,10 @@ class VleMicroInst : public VectorMicroInst protected: Request::Flags memAccessFlags; - VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsLoad] = true; } @@ -319,8 +325,9 @@ class VseMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsStore] = true; } @@ -333,8 +340,8 @@ class VlWholeMacroInst : public VectorMemMacroInst { protected: VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -347,8 +354,10 @@ class VlWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, + uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -359,8 +368,8 @@ class VsWholeMacroInst : public VectorMemMacroInst { protected: VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -373,8 +382,10 @@ class VsWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + OpClass __opClass, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass , _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -385,8 +396,8 @@ class VlStrideMacroInst : public VectorMemMacroInst { protected: VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -399,7 +410,7 @@ class VlStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -413,8 +424,8 @@ class VsStrideMacroInst : public VectorMemMacroInst { protected: VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -427,7 +438,7 @@ class VsStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -441,8 +452,8 @@ class VlIndexMacroInst : public VectorMemMacroInst { protected: VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -473,8 +484,8 @@ class VsIndexMacroInst : public VectorMemMacroInst { protected: VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -516,7 +527,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst { protected: VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -533,10 +544,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst RegId destRegIdxArr[1]; public: - VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, - uint8_t _numSrcs) + uint32_t vlen; + VMaskMergeMicroInst(ExtMachInst extMachInst, + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen) : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0) + VectorIntegerArithOp, 0, 0), + vlen(_vlen) { setRegIdxArrays( reinterpret_cast( @@ -558,26 +571,28 @@ class VMaskMergeMicroInst : public VectorArithMicroInst execute(ExecContext* xc, trace::InstRecord* traceData) const override { vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); auto Vd = tmp_d0.as(); - constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + uint32_t vlenb = pc_ptr->as().vlenb(); + const uint32_t elems_per_vreg = vlenb / sizeof(ElemType); size_t bit_cnt = elems_per_vreg; vreg_t tmp_s; xc->getRegOperand(this, 0, &tmp_s); auto s = tmp_s.as(); // cp the first result and tail - memcpy(Vd, s, VLENB); + memcpy(Vd, s, vlenb); for (uint8_t i = 1; i < this->_numSrcRegs; i++) { xc->getRegOperand(this, i, &tmp_s); s = tmp_s.as(); - if constexpr (elems_per_vreg < 8) { - constexpr uint8_t m = (1 << elems_per_vreg) - 1; - const uint8_t mask = m << (i * elems_per_vreg % 8); + if (elems_per_vreg < 8) { + const uint32_t m = (1 << elems_per_vreg) - 1; + const uint32_t mask = m << (i * elems_per_vreg % 8); // clr & ext bits Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; bit_cnt += elems_per_vreg; } else { - constexpr uint8_t byte_offset = elems_per_vreg / 8; + const uint32_t byte_offset = elems_per_vreg / 8; memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); } } @@ -595,7 +610,8 @@ class VMaskMergeMicroInst : public VectorArithMicroInst for (uint8_t i = 0; i < this->_numSrcRegs; i++) { ss << ", " << registerName(srcRegIdx(i)); } - ss << ", offset:" << VLENB / sizeof(ElemType); + unsigned vlenb = vlen >> 3; + ss << ", offset:" << vlenb / sizeof(ElemType); return ss.str(); } }; diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 5785a14e92..877b795551 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -271,6 +271,9 @@ ISA::ISA(const Params &p) :BaseISA(p), "VLEN should be greater or equal", "than ELEN. Ch. 2RISC-V vector spec."); + inform("RVV enabled, VLEN = %d bits, ELEN = %d bits", + p.vlen, p.elen); + miscRegFile.resize(NUM_MISCREGS); clear(); diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2bd3d33a7e..3d1d396165 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -3110,21 +3110,33 @@ decode QUADRANT default Unknown::unknown() { 0x12: decode VS1 { format VectorIntExtFormat { 0x02: vzext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x03: vsext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x04: vzext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x05: vsext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x06: vzext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x07: vsext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); } @@ -3185,7 +3197,8 @@ decode QUADRANT default Unknown::unknown() { auto Vs2bit = tmp_s2.as(); for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + - vtype_VLMAX(vtype, true) * this->microIdx; + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; bool vs2_lsb = elem_mask(Vs2bit, ei); bool do_mask = elem_mask(v0, ei); bool has_one = false; @@ -3406,7 +3419,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0e: VectorSlideUpFormat::vslideup_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3431,7 +3445,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3662,7 +3677,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslideup_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3687,7 +3703,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3964,7 +3981,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3994,7 +4012,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPFVF, VectorMiscOp); 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -4239,7 +4258,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslide1up_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -4269,7 +4289,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslide1down_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index c462e6c8d4..0d5055ea8f 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -28,6 +28,10 @@ let {{ + def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" + def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" def setDestWrapper(destRegId): return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ "_numTypedDestRegs[VecRegClass]++;\n" @@ -67,7 +71,7 @@ let {{ ''' + code else: return ''' - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; ''' + code def wideningOpRegisterConstraintChecks(code): @@ -178,12 +182,15 @@ def format VectorIntFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -225,12 +232,17 @@ def format VectorIntExtFormat(code, category, *flags) {{ code = loopWrapper(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'ext_div': ext_div}, @@ -293,12 +305,17 @@ def format VectorIntWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -348,12 +365,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), }, @@ -416,12 +438,15 @@ def format VectorIntMaskFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -474,12 +499,17 @@ def format VectorGatherFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'idx_type': idx_type}, @@ -537,12 +567,15 @@ def format VectorFloatFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -581,12 +614,15 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -649,12 +685,17 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -693,12 +734,17 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -738,12 +784,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -783,6 +834,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() code = maskCondWrapper(code) code = eiDeclarePrefix(code) @@ -795,6 +847,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -816,7 +869,8 @@ def format VMvWholeFormat(code, category, *flags) {{ microiop = InstObjParams(name + "_micro", Name + "Micro", 'VMvWholeMicroInst', - {'code': code}, + {'code': code, + 'set_vlen': setVlen()}, flags) header_output = \ @@ -847,6 +901,7 @@ def format ViotaFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() microiop = InstObjParams(name+"_micro", Name+"Micro", @@ -854,6 +909,7 @@ def format ViotaFormat(code, category, *flags){{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -885,12 +941,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() iop = InstObjParams(name, Name, 'VectorNonSplitInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -946,10 +1004,10 @@ def format VectorNonSplitFormat(code, category, *flags) {{ if inst_name == "vfmv" : execute_block = VectorFloatNonSplitExecute.subst(iop) - decode_block = VectorFloatDecodeBlock.subst(iop) + decode_block = VectorFloatNonSplitDecodeBlock.subst(iop) elif inst_name == "vmv" : execute_block = VectorIntNonSplitExecute.subst(iop) - decode_block = VectorIntDecodeBlock.subst(iop) + decode_block = VectorIntNonSplitDecodeBlock.subst(iop) else : error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) @@ -984,6 +1042,8 @@ def format VectorMaskFormat(code, category, *flags) {{ set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_vlenb = setVlenb() + code = loopWrapper(code, micro_inst = False) iop = InstObjParams(name, @@ -992,6 +1052,7 @@ def format VectorMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) # Because of the use of templates, we had to put all parts in header to @@ -1020,6 +1081,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using vu [[maybe_unused]] = std::make_unsigned_t; using vi [[maybe_unused]] = std::make_signed_t; @@ -1030,6 +1094,8 @@ def format VectorReduceIntFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, + 'set_vlen' : set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1062,6 +1128,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using et = ElemType; using vu = decltype(et::v); @@ -1075,6 +1144,8 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1107,6 +1178,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() type_def = ''' using et = ElemType; using vu [[maybe_unused]] = decltype(et::v); @@ -1119,6 +1192,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1162,6 +1237,8 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) @@ -1172,6 +1249,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -1204,12 +1282,16 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -1261,12 +1343,16 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_src_reg_idx += setSrcVm() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa index 457c5ce40d..b997dbec97 100644 --- a/src/arch/riscv/isa/formats/vector_conf.isa +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -157,7 +157,8 @@ def template VConfExecute {{ tc->setMiscReg(MISCREG_VSTART, 0); - VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, vlen); + VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, + vlen); vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen); uint32_t new_vl = getNewVL( current_vl, requested_vl, vlmax, rd_bits, rs1_bits); diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index 113250d5cf..da53d80d0a 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -29,10 +29,15 @@ let {{ +def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" +def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, - decode_template=BasicDecode, exec_template_base='', + decode_template=VMemBaseDecodeBlock, exec_template_base='', # If it's a macroop, the corresponding microops will be # generated. is_macroop=True): @@ -63,7 +68,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, exec_template_base + 'MicroInst', {'ea_code': ea_code, 'memacc_code': memacc_code, - 'postacc_code': postacc_code}, + 'postacc_code': postacc_code, + 'set_vlenb': setVlenb(), + 'set_vlen': setVlen()}, inst_flags) if mem_flags: @@ -90,7 +97,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, def format VleOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -101,7 +110,9 @@ def format VleOp( def format VseOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -134,7 +145,9 @@ def format VsmOp( def format VlWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -145,7 +158,9 @@ def format VlWholeOp( def format VsWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -156,7 +171,9 @@ def format VsWholeOp( def format VlStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -167,7 +184,9 @@ def format VlStrideOp( def format VsStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -186,7 +205,7 @@ def format VlIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VlIndexMacroInst', exec_template_base='VlIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; @@ -200,6 +219,6 @@ def format VsIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VsIndexMacroInst', exec_template_base='VsIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 9b5ee0e7fa..306b1c53f1 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -35,8 +35,8 @@ output header {{ [[maybe_unused]] RiscvISA::vreg_t old_vd; \ [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ xc->getRegOperand(this, (idx), &old_vd); \ - old_Vd = old_vd.as >(); \ - memcpy(Vd, old_Vd, VLENB); + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, vlenb); #define VRM_REQUIRED \ uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ @@ -73,7 +73,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -82,14 +82,14 @@ public: def template VectorIntMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -121,7 +121,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -133,7 +133,7 @@ def template VectorIntMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -173,6 +173,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -190,7 +191,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -214,7 +215,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, @@ -254,13 +255,16 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); auto SEW = vtype_SEW(vtype); - auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + auto index = (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { case 8: { using vext [[maybe_unused]] = int8_t; using vextu [[maybe_unused]] = uint8_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -272,6 +276,8 @@ Fault using vextu [[maybe_unused]] = uint16_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -283,6 +289,8 @@ Fault using vextu [[maybe_unused]] = uint32_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -300,10 +308,10 @@ Fault def template VectorIntDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -316,7 +324,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -325,8 +333,8 @@ public: def template VectorIntWideningMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -337,7 +345,7 @@ template const uint32_t num_microops = 1 << std::max(0, vlmul + 1); int32_t tmp_vl = this->vl; - const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -370,7 +378,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -382,7 +390,7 @@ def template VectorIntWideningMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -415,6 +423,10 @@ Fault return std::make_shared( "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -423,13 +435,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -459,6 +469,11 @@ Fault "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -466,13 +481,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -485,9 +498,9 @@ Fault def template VectorIntWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -500,7 +513,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -508,14 +521,14 @@ public: def template VectorFloatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -547,7 +560,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -557,7 +570,7 @@ public: def template VectorFloatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -598,6 +611,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -611,8 +625,8 @@ Fault def template VectorFloatDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -625,7 +639,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -650,7 +664,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override @@ -693,14 +707,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -737,14 +754,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -757,7 +777,7 @@ Fault def template VectorFloatWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -771,7 +791,7 @@ private: int cnt = 0; %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -781,14 +801,14 @@ public: def template ViotaMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -819,7 +839,7 @@ private: bool vm; int* cnt; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, int* cnt); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -831,7 +851,7 @@ def template ViotaMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, int* cnt) + uint32_t _microVl, uint8_t _microIdx, int* cnt) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -871,6 +891,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -919,6 +940,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -997,7 +1019,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1006,14 +1028,14 @@ public: def template VectorIntMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1028,7 +1050,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1050,7 +1072,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1061,7 +1083,7 @@ def template VectorIntMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1099,10 +1121,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1119,7 +1142,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1128,14 +1151,14 @@ public: def template VectorFloatMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1150,7 +1173,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1171,7 +1194,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1182,7 +1205,7 @@ def template VectorFloatMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1220,10 +1243,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1276,7 +1300,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1287,7 +1311,7 @@ public: def template VMvWholeMicroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1321,7 +1345,8 @@ Fault %(op_decl)s; %(op_rd)s; - for (size_t i = 0; i < (VLEN / 64); i++) { + %(set_vlen)s; + for (size_t i = 0; i < (vlen / 64); i++) { %(code)s; } %(op_wb)s; @@ -1382,6 +1407,7 @@ Fault %(op_decl)s; %(op_rd)s; // TODO: remove it + %(set_vlenb)s; %(copy_old_vd)s; %(code)s; %(op_wb)s; @@ -1489,6 +1515,28 @@ Fault }}; +def template VectorFloatNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + def template VectorReduceMacroDeclare {{ template @@ -1496,7 +1544,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1505,14 +1553,14 @@ public: def template VectorReduceMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1544,7 +1592,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1555,7 +1603,7 @@ def template VectorReduceMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1593,6 +1641,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1600,7 +1650,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { microop_result = f(microop_result, Vs2[i]); } @@ -1625,6 +1676,7 @@ Fault %(type_def)s; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -1638,6 +1690,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1647,7 +1701,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1685,6 +1740,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1694,7 +1751,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1716,7 +1774,7 @@ class %(class_name)s : public %(base_class)s{ private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1725,8 +1783,9 @@ public: def template VectorGatherMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1740,7 +1799,8 @@ template const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; const uint8_t vd_vregs = vs2_vregs; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1778,7 +1838,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1789,7 +1849,7 @@ def template VectorGatherMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1839,17 +1899,19 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; - const uint32_t vlmax = vtype_VLMAX(vtype); + const uint32_t vlmax = vtype_VLMAX(vtype,vlen); constexpr uint8_t vd_eewb = sizeof(ElemType); constexpr uint8_t vs1_eewb = sizeof(IndexType); constexpr uint8_t vs2_eewb = sizeof(ElemType); constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; - [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; - [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; - [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb; + [[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb; + [[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb; [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; @@ -1875,19 +1937,19 @@ def template VectorGatherDecodeBlock {{ switch(machInst.vtype8.vsew) { case 0b000: { using elem_type [[maybe_unused]] = uint8_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b001: { using elem_type [[maybe_unused]] = uint16_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b010: { using elem_type [[maybe_unused]] = uint32_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b011: { using elem_type [[maybe_unused]] = uint64_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } default: GEM5_UNREACHABLE; } @@ -1902,7 +1964,7 @@ private: %(reg_idx_arr_decl)s; bool vxsat = false; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1911,14 +1973,14 @@ public: def template VectorIntVxsatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1954,7 +2016,7 @@ private: bool vm; bool* vxsatptr; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1966,7 +2028,7 @@ def template VectorIntVxsatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -2007,6 +2069,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -2016,7 +2080,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]); } @@ -2038,7 +2103,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -2047,14 +2112,14 @@ public: def template VectorSlideUpMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2082,14 +2147,14 @@ template def template VectorSlideDownMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2126,7 +2191,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -2138,7 +2203,7 @@ def template VectorSlideMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vdIdx, _vs2Idx) { @@ -2174,10 +2239,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -2210,10 +2278,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 2b3b9187bf..fc1b93548c 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -34,6 +34,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -48,6 +49,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -55,16 +57,17 @@ public: def template VleConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; if (micro_vl == 0) { @@ -72,7 +75,7 @@ def template VleConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -93,9 +96,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, - _microIdx) + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -127,12 +131,15 @@ Fault Addr EA; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; RiscvISA::vreg_t tmp_v0; uint8_t *v0; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -150,15 +157,18 @@ Fault } uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); if (fault != NoFault) return fault; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; %(memacc_code)s; @@ -176,10 +186,12 @@ Fault %(class_name)s::initiateAcc(ExecContext* xc, trace::InstRecord* traceData) const { + Addr EA; %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; MISA misa = xc->readMiscReg(MISCREG_ISA); @@ -192,6 +204,7 @@ Fault return std::make_shared("VILL is set", machInst); uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -208,6 +221,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -222,8 +236,9 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; @@ -238,13 +253,13 @@ Fault def template VseConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); @@ -256,7 +271,7 @@ def template VseConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -277,9 +292,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[0]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -326,9 +342,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -375,9 +393,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -412,20 +432,20 @@ Fault def template VlmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); } @@ -439,20 +459,20 @@ def template VlmConstructor {{ def template VsmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); } @@ -466,18 +486,18 @@ def template VsmConstructor {{ def template VsWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -497,9 +517,10 @@ private: RegId destRegIdxArr[0]; RegId srcRegIdxArr[2]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -532,9 +553,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -560,9 +583,11 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } @@ -586,18 +611,19 @@ Fault def template VlWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -617,9 +643,10 @@ private: RegId destRegIdxArr[1]; RegId srcRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s_micro", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -657,6 +684,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; Fault fault = readMemAtomicLE(xc, traceData, EA, @@ -664,7 +693,7 @@ Fault if (fault != NoFault) return fault; - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; i++) { %(memacc_code)s; } @@ -690,6 +719,7 @@ Fault } %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); @@ -706,6 +736,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -713,7 +744,7 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; ++i) { %(memacc_code)s; } @@ -726,13 +757,13 @@ Fault def template VlStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -770,7 +801,7 @@ private: RegId destRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -820,6 +851,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -833,7 +865,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -866,6 +898,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -877,7 +910,7 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -894,6 +927,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -920,12 +954,12 @@ Fault memcpy(Vd, old_Vd, microVl * elem_size); // treat vta as vtu // if (machInst.vtype8.vta == 0) - memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size); } else { - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); } - size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -939,13 +973,13 @@ Fault def template VsStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -983,7 +1017,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -1025,6 +1059,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; @@ -1038,7 +1073,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1074,11 +1109,13 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_store = machInst.vm || elem_mask(v0, ei); if (need_store) { const std::vector byte_enable(mem_size, need_store); @@ -1105,8 +1142,8 @@ Fault def template VlIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1115,7 +1152,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1212,6 +1250,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vd[0]); RiscvISA::vreg_t tmp_v0; @@ -1223,8 +1262,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -1259,6 +1297,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -1270,7 +1309,8 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -1293,10 +1333,11 @@ Fault using vu = std::make_unsigned_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); - RiscvISA::vreg_t old_vd; + RiscvISA::vreg_t old_vd;; decltype(Vd) old_Vd = nullptr; // We treat agnostic as undistrubed xc->getRegOperand(this, 2, &old_vd); @@ -1309,9 +1350,9 @@ Fault v0 = tmp_v0.as(); } - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -1326,8 +1367,8 @@ Fault def template VsIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1336,7 +1377,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; - const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1426,6 +1468,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1438,7 +1481,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1469,6 +1512,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1481,7 +1525,7 @@ Fault constexpr uint8_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1504,6 +1548,10 @@ Fault }}; +def template VMemBaseDecodeBlock {{ + return new %(class_name)s(machInst, vlen); +}}; + def template VMemTemplateDecodeBlock {{ switch(machInst.vtype8.vsew) { @@ -1523,3 +1571,23 @@ switch(machInst.vtype8.vsew) { } }}; + +def template VMemSplitTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst, vlen); + } + case 0b001: { + return new %(class_name)s(machInst, vlen); + } + case 0b010: { + return new %(class_name)s(machInst, vlen); + } + case 0b011: { + return new %(class_name)s(machInst, vlen); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 03a7fc415f..91fb507034 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4> bool _compressed = false; RiscvType _rvType = RV64; - uint64_t _vlenb = 256; + uint64_t _vlenb = 32; VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial; uint32_t _vl = 0; diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 01c600d148..c7edffc2f7 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -42,7 +42,6 @@ #ifndef __ARCH_RISCV_TYPES_HH__ #define __ARCH_RISCV_TYPES_HH__ -#include "arch/riscv/pcstate.hh" #include "base/bitunion.hh" namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 40054aec0f..bac499e523 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -268,12 +268,13 @@ vtype_SEW(const uint64_t vtype) * Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h */ inline uint64_t -vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +vtype_VLMAX(const uint64_t vtype, const uint64_t vlen, + const bool per_reg = false) { int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); lmul = per_reg ? std::min(0, lmul) : lmul; int64_t vsew = bits(vtype, 5, 3); - return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); + return vlen >> (vsew + 3 - lmul); } inline int64_t