diff --git a/src/arch/generic/memhelpers.hh b/src/arch/generic/memhelpers.hh index d5684a6af9..9cdd2a56eb 100644 --- a/src/arch/generic/memhelpers.hh +++ b/src/arch/generic/memhelpers.hh @@ -124,6 +124,24 @@ readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, return fault; } +/// Read from memory in atomic mode. +template +Fault +readMemAtomic(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, + size_t size, Request::Flags flags) +{ + memset(&mem, 0, size); + static const std::vector byte_enable(size, true); + Fault fault = readMemAtomic(xc, addr, (uint8_t*)&mem, + size, flags, byte_enable); + if (fault == NoFault) { + mem = gtoh(mem, Order); + if (traceData) + traceData->setData(mem); + } + return fault; +} + template Fault readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, @@ -133,6 +151,16 @@ readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, xc, traceData, addr, mem, flags); } +template +Fault +readMemAtomicLE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, + size_t size, Request::Flags flags) +{ + return readMemAtomic( + xc, traceData, addr, mem, size, flags); +} + + template Fault readMemAtomicBE(XC *xc, trace::InstRecord *traceData, Addr addr, MemT &mem, @@ -165,6 +193,20 @@ writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, sizeof(MemT), flags, res, byte_enable); } +template +Fault +writeMemTiming(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, + size_t size, Request::Flags flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(mem); + } + mem = htog(mem, Order); + static const std::vector byte_enable(size, true); + return writeMemTiming(xc, (uint8_t*)&mem, addr, + size, flags, res, byte_enable); +} + template Fault writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, @@ -174,6 +216,15 @@ writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, xc, traceData, mem, addr, flags, res); } +template +Fault +writeMemTimingLE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, + size_t size, Request::Flags flags, uint64_t *res) +{ + return writeMemTiming( + xc, traceData, mem, addr, size, flags, res); +} + template Fault writeMemTimingBE(XC *xc, trace::InstRecord *traceData, MemT mem, Addr addr, @@ -214,6 +265,27 @@ writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem, return fault; } +template +Fault +writeMemAtomic(XC *xc, trace::InstRecord *traceData, const MemT &mem, + Addr addr, size_t size, Request::Flags flags, uint64_t *res) +{ + if (traceData) { + traceData->setData(mem); + } + MemT host_mem = htog(mem, Order); + static const std::vector byte_enable(size, true); + Fault fault = writeMemAtomic(xc, (uint8_t*)&host_mem, + addr, size, flags, res, byte_enable); + if (fault == NoFault && res != NULL) { + if (flags & Request::MEM_SWAP || flags & Request::MEM_SWAP_COND) + *(MemT *)res = gtoh(*(MemT *)res, Order); + else + *res = gtoh(*res, Order); + } + return fault; +} + template Fault writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, @@ -223,6 +295,15 @@ writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, xc, traceData, mem, addr, flags, res); } +template +Fault +writeMemAtomicLE(XC *xc, trace::InstRecord *traceData, const MemT &mem, + size_t size, Addr addr, Request::Flags flags, uint64_t *res) +{ + return writeMemAtomic( + xc, traceData, mem, addr, size, flags, res); +} + template Fault writeMemAtomicBE(XC *xc, trace::InstRecord *traceData, const MemT &mem, diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py index f66171a95a..3f123405e9 100644 --- a/src/arch/riscv/RiscvISA.py +++ b/src/arch/riscv/RiscvISA.py @@ -38,11 +38,37 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from m5.params import Enum +from m5.params import Enum, UInt32 from m5.params import Param from m5.objects.BaseISA import BaseISA +class RiscvVectorLength(UInt32): + min = 8 + max = 65536 + + def _check(self): + super()._check() + + # VLEN needs to be a whole power of 2. We already know value is + # not zero. Hence: + if self.value & (self.value - 1) != 0: + raise TypeError("VLEN is not a power of 2: %d" % self.value) + + +class RiscvVectorElementLength(UInt32): + min = 8 + max = 64 + + def _check(self): + super()._check() + + # ELEN needs to be a whole power of 2. We already know value is + # not zero. Hence: + if self.value & (self.value - 1) != 0: + raise TypeError("ELEN is not a power of 2: %d" % self.value) + + class RiscvType(Enum): vals = ["RV32", "RV64"] @@ -58,3 +84,13 @@ class RiscvISA(BaseISA): riscv_type = Param.RiscvType("RV64", "RV32 or RV64") enable_rvv = Param.Bool(True, "Enable vector extension") + vlen = Param.RiscvVectorLength( + 256, + "Length of each vector register in bits. \ + VLEN in Ch. 2 of RISC-V vector spec", + ) + elen = Param.RiscvVectorElementLength( + 64, + "Length of each vector element in bits. \ + ELEN in Ch. 2 of RISC-V vector spec", + ) diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index b1e2948e93..ee5d313587 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -41,6 +41,9 @@ namespace RiscvISA Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) { + ISA *isa = dynamic_cast(p.isa); + vlen = isa->getVecLenInBits(); + elen = isa->getVecElemLenInBits(); reset(); } diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index c827e85f90..bf863fda22 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -60,6 +60,9 @@ class Decoder : public InstDecoder ExtMachInst emi; uint32_t machInst; + uint32_t vlen; + uint32_t elen; + virtual StaticInstPtr decodeInst(ExtMachInst mach_inst); /// Decode a machine instruction. diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 6ecec44dc5..c99e806e9b 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -215,8 +215,9 @@ std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " << registerName(srcRegIdx(1)); if (!machInst.vm) ss << ", v0.t"; return ss.str(); @@ -226,8 +227,9 @@ std::string VlWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } @@ -235,8 +237,9 @@ std::string VseMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; if (!machInst.vm) ss << ", v0.t"; return ss.str(); } @@ -245,8 +248,9 @@ std::string VsWholeMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { std::stringstream ss; + unsigned vlenb = vlen >> 3; ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " - << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + << vlenb * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; return ss.str(); } diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index b25a6e3a09..c986c99c72 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -32,6 +32,7 @@ #include #include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/isa.hh" #include "arch/riscv/regs/misc.hh" #include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" @@ -68,12 +69,15 @@ class VConfOp : public RiscvStaticInst uint64_t zimm10; uint64_t zimm11; uint64_t uimm; - VConfOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + uint32_t elen; + VConfOp(const char *mnem, ExtMachInst _extMachInst, + uint32_t _elen, OpClass __opClass) : RiscvStaticInst(mnem, _extMachInst, __opClass), bit30(_extMachInst.bit30), bit31(_extMachInst.bit31), zimm10(_extMachInst.zimm_vsetivli), zimm11(_extMachInst.zimm_vsetvli), - uimm(_extMachInst.uimm_vsetivli) + uimm(_extMachInst.uimm_vsetivli), + elen(_elen) { this->flags[IsVector] = true; } @@ -116,11 +120,14 @@ class VectorMacroInst : public RiscvMacroInst protected: uint32_t vl; uint8_t vtype; + uint32_t vlen; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) + OpClass __opClass, uint32_t _vlen = 256) : RiscvMacroInst(mnem, _machInst, __opClass), vl(_machInst.vl), - vtype(_machInst.vtype8) + vtype(_machInst.vtype8), + vlen(_vlen) { this->flags[IsVector] = true; } @@ -128,13 +135,15 @@ class VectorMacroInst : public RiscvMacroInst class VectorMicroInst : public RiscvMicroInst { - protected: - uint8_t microVl; +protected: + uint32_t vlen; + uint32_t microVl; uint8_t microIdx; uint8_t vtype; VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen = 256) : RiscvMicroInst(mnem, _machInst, __opClass), + vlen(_vlen), microVl(_microVl), microIdx(_microIdx), vtype(_machInst.vtype8) @@ -169,7 +178,7 @@ class VectorArithMicroInst : public VectorMicroInst { protected: VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -182,12 +191,11 @@ class VectorArithMacroInst : public VectorMacroInst { protected: VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } - std::string generateDisassembly( Addr pc, const loader::SymbolTable *symtab) const override; }; @@ -196,7 +204,7 @@ class VectorVMUNARY0MicroInst : public VectorMicroInst { protected: VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -209,8 +217,8 @@ class VectorVMUNARY0MacroInst : public VectorMacroInst { protected: VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -223,8 +231,8 @@ class VectorSlideMacroInst : public VectorMacroInst { protected: VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) { this->flags[IsVector] = true; } @@ -239,7 +247,7 @@ class VectorSlideMicroInst : public VectorMicroInst uint8_t vdIdx; uint8_t vs2Idx; VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , vdIdx(_vdIdx), vs2Idx(_vs2Idx) @@ -256,7 +264,7 @@ class VectorMemMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, uint32_t _offset) : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) , offset(_offset) @@ -268,8 +276,8 @@ class VectorMemMacroInst : public VectorMacroInst { protected: VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen = 256) + : VectorMacroInst(mnem, _machInst, __opClass, _vlen) {} }; @@ -277,8 +285,8 @@ class VleMacroInst : public VectorMemMacroInst { protected: VleMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -289,8 +297,8 @@ class VseMacroInst : public VectorMemMacroInst { protected: VseMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -302,9 +310,10 @@ class VleMicroInst : public VectorMicroInst protected: Request::Flags memAccessFlags; - VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + VleMicroInst(const char *mnem, ExtMachInst _machInst,OpClass __opClass, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsLoad] = true; } @@ -319,8 +328,9 @@ class VseMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, - uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) { this->flags[IsStore] = true; } @@ -333,8 +343,8 @@ class VlWholeMacroInst : public VectorMemMacroInst { protected: VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -347,8 +357,10 @@ class VlWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx, + uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -359,8 +371,8 @@ class VsWholeMacroInst : public VectorMemMacroInst { protected: VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -373,8 +385,10 @@ class VsWholeMicroInst : public VectorMicroInst Request::Flags memAccessFlags; VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) - : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + OpClass __opClass, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : VectorMicroInst(mnem, _machInst, __opClass , _microVl, + _microIdx, _vlen) {} std::string generateDisassembly( @@ -385,8 +399,8 @@ class VlStrideMacroInst : public VectorMemMacroInst { protected: VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -399,7 +413,7 @@ class VlStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -413,8 +427,8 @@ class VsStrideMacroInst : public VectorMemMacroInst { protected: VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -427,7 +441,7 @@ class VsStrideMicroInst : public VectorMemMicroInst uint8_t regIdx; VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, - uint8_t _microIdx, uint8_t _microVl) + uint8_t _microIdx, uint32_t _microVl) : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx, 0) , regIdx(_regIdx) @@ -441,8 +455,8 @@ class VlIndexMacroInst : public VectorMemMacroInst { protected: VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -473,8 +487,8 @@ class VsIndexMacroInst : public VectorMemMacroInst { protected: VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, - OpClass __opClass) - : VectorMemMacroInst(mnem, _machInst, __opClass) + OpClass __opClass, uint32_t _vlen) + : VectorMemMacroInst(mnem, _machInst, __opClass, _vlen) {} std::string generateDisassembly( @@ -516,7 +530,7 @@ class VMvWholeMicroInst : public VectorArithMicroInst { protected: VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, - OpClass __opClass, uint8_t _microVl, + OpClass __opClass, uint32_t _microVl, uint8_t _microIdx) : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) {} @@ -533,10 +547,12 @@ class VMaskMergeMicroInst : public VectorArithMicroInst RegId destRegIdxArr[1]; public: - VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, - uint8_t _numSrcs) + uint32_t vlen; + VMaskMergeMicroInst(ExtMachInst extMachInst, + uint8_t _dstReg, uint8_t _numSrcs, uint32_t _vlen) : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0) + VectorIntegerArithOp, 0, 0), + vlen(_vlen) { setRegIdxArrays( reinterpret_cast( @@ -558,26 +574,28 @@ class VMaskMergeMicroInst : public VectorArithMicroInst execute(ExecContext* xc, trace::InstRecord* traceData) const override { vreg_t& tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + PCStateBase *pc_ptr = xc->tcBase()->pcState().clone(); auto Vd = tmp_d0.as(); - constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + uint32_t vlenb = pc_ptr->as().vlenb(); + const uint32_t elems_per_vreg = vlenb / sizeof(ElemType); size_t bit_cnt = elems_per_vreg; vreg_t tmp_s; xc->getRegOperand(this, 0, &tmp_s); auto s = tmp_s.as(); // cp the first result and tail - memcpy(Vd, s, VLENB); + memcpy(Vd, s, vlenb); for (uint8_t i = 1; i < this->_numSrcRegs; i++) { xc->getRegOperand(this, i, &tmp_s); s = tmp_s.as(); - if constexpr (elems_per_vreg < 8) { - constexpr uint8_t m = (1 << elems_per_vreg) - 1; - const uint8_t mask = m << (i * elems_per_vreg % 8); + if (elems_per_vreg < 8) { + const uint32_t m = (1 << elems_per_vreg) - 1; + const uint32_t mask = m << (i * elems_per_vreg % 8); // clr & ext bits Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; bit_cnt += elems_per_vreg; } else { - constexpr uint8_t byte_offset = elems_per_vreg / 8; + const uint32_t byte_offset = elems_per_vreg / 8; memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); } } @@ -595,7 +613,8 @@ class VMaskMergeMicroInst : public VectorArithMicroInst for (uint8_t i = 0; i < this->_numSrcRegs; i++) { ss << ", " << registerName(srcRegIdx(i)); } - ss << ", offset:" << VLENB / sizeof(ElemType); + unsigned vlenb = vlen >> 3; + ss << ", offset:" << vlenb / sizeof(ElemType); return ss.str(); } }; diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index a1f4056bf5..877b795551 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -36,6 +36,7 @@ #include #include "arch/riscv/faults.hh" +#include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/interrupts.hh" #include "arch/riscv/mmu.hh" #include "arch/riscv/pagetable.hh" @@ -253,10 +254,9 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace -ISA::ISA(const Params &p) : - BaseISA(p), _rvType(p.riscv_type), checkAlignment(p.check_alignment), - enableRvv(p.enable_rvv) - +ISA::ISA(const Params &p) :BaseISA(p), + _rvType(p.riscv_type), checkAlignment(p.check_alignment), + enableRvv(p.enable_rvv),vlen(p.vlen),elen(p.elen) { _regClasses.push_back(&intRegClass); _regClasses.push_back(&floatRegClass); @@ -267,6 +267,14 @@ ISA::ISA(const Params &p) : _regClasses.push_back(&ccRegClass); _regClasses.push_back(&miscRegClass); + fatal_if( p.vlen < p.elen, + "VLEN should be greater or equal", + "than ELEN. Ch. 2RISC-V vector spec."); + + inform("RVV enabled, VLEN = %d bits, ELEN = %d bits", + p.vlen, p.elen); + + miscRegFile.resize(NUM_MISCREGS); clear(); } diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 13366ef4c3..5581c3b677 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -84,6 +84,16 @@ class ISA : public BaseISA const Addr INVALID_RESERVATION_ADDR = (Addr)-1; std::unordered_map load_reservation_addrs; + /** Length of each vector register in bits. + * VLEN in Ch. 2 of RISC-V vector spec + */ + unsigned vlen; + + /** Length of each vector element in bits. + * ELEN in Ch. 2 of RISC-V vector spec + */ + unsigned elen; + public: using Params = RiscvISAParams; @@ -92,7 +102,8 @@ class ISA : public BaseISA PCStateBase* newPCState(Addr new_inst_addr=0) const override { - return new PCState(new_inst_addr, _rvType, VLENB); + unsigned vlenb = vlen >> 3; + return new PCState(new_inst_addr, _rvType, vlenb); } public: @@ -147,6 +158,10 @@ class ISA : public BaseISA Addr& load_reservation_addr = load_reservation_addrs[cid]; load_reservation_addr = INVALID_RESERVATION_ADDR; } + /** Methods for getting VLEN, VLENB and ELEN values */ + unsigned getVecLenInBits() { return vlen; } + unsigned getVecLenInBytes() { return vlen >> 3; } + unsigned getVecElemLenInBits() { return elen; } }; } // namespace RiscvISA diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2bd3d33a7e..3d1d396165 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -3110,21 +3110,33 @@ decode QUADRANT default Unknown::unknown() { 0x12: decode VS1 { format VectorIntExtFormat { 0x02: vzext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x03: vsext_vf8({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x04: vzext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x05: vsext_vf4({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x06: vzext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vu[i] = Vs2_vextu[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); 0x07: vsext_vf2({{ + auto offset = (vlen / SEW) * index; + Vd_vi[i] = Vs2_vext[i + offset]; }}, OPMVV, VectorIntegerExtensionOp); } @@ -3185,7 +3197,8 @@ decode QUADRANT default Unknown::unknown() { auto Vs2bit = tmp_s2.as(); for (uint32_t i = 0; i < this->microVl; i++) { uint32_t ei = i + - vtype_VLMAX(vtype, true) * this->microIdx; + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; bool vs2_lsb = elem_mask(Vs2bit, ei); bool do_mask = elem_mask(v0, ei); bool has_one = false; @@ -3406,7 +3419,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0e: VectorSlideUpFormat::vslideup_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3431,7 +3445,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVI, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vi({{ const int offset = (int)(uint64_t)(SIMM5); - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3662,7 +3677,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslideup_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3687,7 +3703,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslidedown_vx({{ const int offset = (int)Rs1_vu; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -3964,7 +3981,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -3994,7 +4012,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPFVF, VectorMiscOp); 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); @@ -4239,7 +4258,8 @@ decode QUADRANT default Unknown::unknown() { } 0x0e: VectorSlideUpFormat::vslide1up_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vdIdx - vs2Idx; const int offsetInVreg = offset - vregOffset * microVlmax; if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { @@ -4269,7 +4289,8 @@ decode QUADRANT default Unknown::unknown() { }}, OPIVX, VectorMiscOp); 0x0f: VectorSlideDownFormat::vslide1down_vx({{ const int offset = 1; - const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int microVlmax = vtype_VLMAX(machInst.vtype8, + vlen, true); const int vregOffset = vs2Idx - vdIdx; const int offsetInVreg = offset - vregOffset * microVlmax; const int numVs2s = vtype_regs_per_group(vtype); diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa index c462e6c8d4..0d5055ea8f 100644 --- a/src/arch/riscv/isa/formats/vector_arith.isa +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -28,6 +28,10 @@ let {{ + def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" + def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" def setDestWrapper(destRegId): return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ "_numTypedDestRegs[VecRegClass]++;\n" @@ -67,7 +71,7 @@ let {{ ''' + code else: return ''' - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx; ''' + code def wideningOpRegisterConstraintChecks(code): @@ -178,12 +182,15 @@ def format VectorIntFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -225,12 +232,17 @@ def format VectorIntExtFormat(code, category, *flags) {{ code = loopWrapper(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'ext_div': ext_div}, @@ -293,12 +305,17 @@ def format VectorIntWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -348,12 +365,17 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{ code = narrowingOpRegisterConstraintChecks(code) vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), }, @@ -416,12 +438,15 @@ def format VectorIntMaskFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -474,12 +499,17 @@ def format VectorGatherFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx), 'idx_type': idx_type}, @@ -537,12 +567,15 @@ def format VectorFloatFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -581,12 +614,15 @@ def format VectorFloatCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -649,12 +685,17 @@ def format VectorFloatWideningFormat(code, category, *flags) {{ if v0_required: vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -693,12 +734,17 @@ def format VectorFloatWideningCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -738,12 +784,17 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb(); + set_vlen = setVlen(); + microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -783,6 +834,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() code = maskCondWrapper(code) code = eiDeclarePrefix(code) @@ -795,6 +847,7 @@ def format VectorFloatMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -816,7 +869,8 @@ def format VMvWholeFormat(code, category, *flags) {{ microiop = InstObjParams(name + "_micro", Name + "Micro", 'VMvWholeMicroInst', - {'code': code}, + {'code': code, + 'set_vlen': setVlen()}, flags) header_output = \ @@ -847,6 +901,7 @@ def format ViotaFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() microiop = InstObjParams(name+"_micro", Name+"Micro", @@ -854,6 +909,7 @@ def format ViotaFormat(code, category, *flags){{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -885,12 +941,14 @@ def format Vector1Vs1VdMaskFormat(code, category, *flags){{ set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_vm_idx = setSrcVm() + set_vlenb = setVlenb() iop = InstObjParams(name, Name, 'VectorNonSplitInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'set_vm_idx': set_vm_idx, 'copy_old_vd': copyOldVd(1)}, @@ -946,10 +1004,10 @@ def format VectorNonSplitFormat(code, category, *flags) {{ if inst_name == "vfmv" : execute_block = VectorFloatNonSplitExecute.subst(iop) - decode_block = VectorFloatDecodeBlock.subst(iop) + decode_block = VectorFloatNonSplitDecodeBlock.subst(iop) elif inst_name == "vmv" : execute_block = VectorIntNonSplitExecute.subst(iop) - decode_block = VectorIntDecodeBlock.subst(iop) + decode_block = VectorIntNonSplitDecodeBlock.subst(iop) else : error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) @@ -984,6 +1042,8 @@ def format VectorMaskFormat(code, category, *flags) {{ set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_vlenb = setVlenb() + code = loopWrapper(code, micro_inst = False) iop = InstObjParams(name, @@ -992,6 +1052,7 @@ def format VectorMaskFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) # Because of the use of templates, we had to put all parts in header to @@ -1020,6 +1081,9 @@ def format VectorReduceIntFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using vu [[maybe_unused]] = std::make_unsigned_t; using vi [[maybe_unused]] = std::make_signed_t; @@ -1030,6 +1094,8 @@ def format VectorReduceIntFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb' : set_vlenb, + 'set_vlen' : set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1062,6 +1128,9 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() + type_def = ''' using et = ElemType; using vu = decltype(et::v); @@ -1075,6 +1144,8 @@ def format VectorReduceFloatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1107,6 +1178,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() type_def = ''' using et = ElemType; using vu [[maybe_unused]] = decltype(et::v); @@ -1119,6 +1192,8 @@ def format VectorReduceFloatWideningFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'type_def': type_def, 'copy_old_vd': copyOldVd(2)}, @@ -1162,6 +1237,8 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + code = maskCondWrapper(code) code = eiDeclarePrefix(code) code = loopWrapper(code) @@ -1172,6 +1249,7 @@ def format VectorIntVxsatFormat(code, category, *flags) {{ {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) @@ -1204,12 +1282,16 @@ def format VectorReduceIntWideningFormat(code, category, *flags) {{ set_src_reg_idx += setSrcWrapper(old_dest_reg_id) set_src_reg_idx += setSrcVm() vm_decl_rd = vmDeclAndReadData() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", 'VectorArithMicroInst', {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(2)}, flags) @@ -1261,12 +1343,16 @@ def VectorSlideBase(name, Name, category, code, flags, macro_construtor, set_dest_reg_idx = setDestWrapper(dest_reg_id) vm_decl_rd = vmDeclAndReadData() set_src_reg_idx += setSrcVm() + set_vlenb = setVlenb() + set_vlen = setVlen() microiop = InstObjParams(name + "_micro", Name + "Micro", microop_class_name, {'code': code, 'set_dest_reg_idx': set_dest_reg_idx, 'set_src_reg_idx': set_src_reg_idx, + 'set_vlenb': set_vlenb, + 'set_vlen': set_vlen, 'vm_decl_rd': vm_decl_rd, 'copy_old_vd': copyOldVd(old_vd_idx)}, flags) diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa index 457c5ce40d..6280e5679b 100644 --- a/src/arch/riscv/isa/formats/vector_conf.isa +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -42,8 +42,8 @@ def format VConfOp(code, write_code, declare_class, branch_class, *flags) {{ branchTargetTemplate = eval(branch_class) header_output = declareTemplate.subst(iop) - decoder_output = BasicConstructor.subst(iop) - decode_block = BasicDecode.subst(iop) + decoder_output = VConfConstructor.subst(iop) + decode_block = VConfDecodeBlock.subst(iop) exec_output = VConfExecute.subst(iop) + branchTargetTemplate.subst(iop) }}; @@ -61,7 +61,7 @@ def template VSetVlDeclare {{ public: /// Constructor. - %(class_name)s(ExtMachInst machInst); + %(class_name)s(ExtMachInst machInst, uint32_t elen); Fault execute(ExecContext *, trace::InstRecord *) const override; std::unique_ptr branchTarget( ThreadContext *tc) const override; @@ -86,7 +86,7 @@ def template VSetiVliDeclare {{ public: /// Constructor. - %(class_name)s(ExtMachInst machInst); + %(class_name)s(ExtMachInst machInst, uint32_t elen); Fault execute(ExecContext *, trace::InstRecord *) const override; std::unique_ptr branchTarget( const PCStateBase &branch_pc) const override; @@ -97,6 +97,19 @@ def template VSetiVliDeclare {{ }; }}; +def template VConfConstructor {{ +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _elen) + : %(base_class)s("%(mnemonic)s", _machInst, _elen, %(op_class)s) + { + %(set_reg_idx_arr)s; + %(constructor)s; + } +}}; + +def template VConfDecodeBlock {{ + return new %(class_name)s(machInst,elen); +}}; + def template VConfExecute {{ VTYPE %(class_name)s::getNewVtype( @@ -112,7 +125,7 @@ def template VConfExecute {{ uint32_t newVill = !(vflmul >= 0.125 && vflmul <= 8) || - sew > std::min(vflmul, 1.0f) * ELEN || + sew > std::min(vflmul, 1.0f) * elen || bits(reqVtype, 62, 8) != 0; if (newVill) { newVtype = 0; @@ -157,7 +170,8 @@ def template VConfExecute {{ tc->setMiscReg(MISCREG_VSTART, 0); - VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, vlen); + VTYPE new_vtype = getNewVtype(Vtype, requested_vtype, + vlen); vlmax = new_vtype.vill ? 0 : getVlmax(new_vtype, vlen); uint32_t new_vl = getNewVL( current_vl, requested_vl, vlmax, rd_bits, rs1_bits); diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa index 113250d5cf..da53d80d0a 100644 --- a/src/arch/riscv/isa/formats/vector_mem.isa +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -29,10 +29,15 @@ let {{ +def setVlen(): + return "uint32_t vlen = VlenbBits * 8;\n" +def setVlenb(): + return "uint32_t vlenb = VlenbBits;\n" + def VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, base_class, postacc_code='', declare_template_base=VMemMacroDeclare, - decode_template=BasicDecode, exec_template_base='', + decode_template=VMemBaseDecodeBlock, exec_template_base='', # If it's a macroop, the corresponding microops will be # generated. is_macroop=True): @@ -63,7 +68,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, exec_template_base + 'MicroInst', {'ea_code': ea_code, 'memacc_code': memacc_code, - 'postacc_code': postacc_code}, + 'postacc_code': postacc_code, + 'set_vlenb': setVlenb(), + 'set_vlen': setVlen()}, inst_flags) if mem_flags: @@ -90,7 +97,9 @@ def VMemBase(name, Name, ea_code, memacc_code, mem_flags, def format VleOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -101,7 +110,9 @@ def format VleOp( def format VseOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -134,7 +145,9 @@ def format VsmOp( def format VlWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -145,7 +158,9 @@ def format VlWholeOp( def format VsWholeOp( memacc_code, - ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + ea_code={{ + EA = Rs1 + vlenb * microIdx; + }}, mem_flags=[], inst_flags=[] ) {{ @@ -156,7 +171,9 @@ def format VsWholeOp( def format VlStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -167,7 +184,9 @@ def format VlStrideOp( def format VsStrideOp( memacc_code, - ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + ea_code={{ + EA = Rs1 + Rs2 * (regIdx * vlenb / elem_size + microIdx); + }}, mem_flags=[], inst_flags=[] ) {{ @@ -186,7 +205,7 @@ def format VlIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VlIndexMacroInst', exec_template_base='VlIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; @@ -200,6 +219,6 @@ def format VsIndexOp( VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, 'VsIndexMacroInst', exec_template_base='VsIndex', declare_template_base=VMemTemplateMacroDeclare, - decode_template=VMemTemplateDecodeBlock + decode_template=VMemSplitTemplateDecodeBlock ) }}; diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa index 9b5ee0e7fa..306b1c53f1 100644 --- a/src/arch/riscv/isa/templates/vector_arith.isa +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -35,8 +35,8 @@ output header {{ [[maybe_unused]] RiscvISA::vreg_t old_vd; \ [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ xc->getRegOperand(this, (idx), &old_vd); \ - old_Vd = old_vd.as >(); \ - memcpy(Vd, old_Vd, VLENB); + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, vlenb); #define VRM_REQUIRED \ uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ @@ -73,7 +73,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -82,14 +82,14 @@ public: def template VectorIntMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -121,7 +121,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -133,7 +133,7 @@ def template VectorIntMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -173,6 +173,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -190,7 +191,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -214,7 +215,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, @@ -254,13 +255,16 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); auto SEW = vtype_SEW(vtype); - auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + auto index = (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { case 8: { using vext [[maybe_unused]] = int8_t; using vextu [[maybe_unused]] = uint8_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -272,6 +276,8 @@ Fault using vextu [[maybe_unused]] = uint16_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -283,6 +289,8 @@ Fault using vextu [[maybe_unused]] = uint32_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -300,10 +308,10 @@ Fault def template VectorIntDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -316,7 +324,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -325,8 +333,8 @@ public: def template VectorIntWideningMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -337,7 +345,7 @@ template const uint32_t num_microops = 1 << std::max(0, vlmul + 1); int32_t tmp_vl = this->vl; - const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -370,7 +378,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -382,7 +390,7 @@ def template VectorIntWideningMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -415,6 +423,10 @@ Fault return std::make_shared( "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -423,13 +435,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -459,6 +469,11 @@ Fault "RVV is disabled or VPU is off", machInst); } + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + if (machInst.vill) return std::make_shared("VILL is set", machInst); @@ -466,13 +481,11 @@ Fault xc->setMiscReg(MISCREG_STATUS, status); const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -485,9 +498,9 @@ Fault def template VectorIntWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b000: return new %(class_name)s(machInst); -case 0b001: return new %(class_name)s(machInst); -case 0b010: return new %(class_name)s(machInst); +case 0b000: return new %(class_name)s(machInst, vlen); +case 0b001: return new %(class_name)s(machInst, vlen); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -500,7 +513,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -508,14 +521,14 @@ public: def template VectorFloatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -547,7 +560,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -557,7 +570,7 @@ public: def template VectorFloatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -598,6 +611,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -611,8 +625,8 @@ Fault def template VectorFloatDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); -case 0b011: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); +case 0b011: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -625,7 +639,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override { @@ -650,7 +664,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override @@ -693,14 +707,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -737,14 +754,17 @@ Fault VRM_REQUIRED; + %(op_decl)s; + %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + const int64_t vlmul = vtype_vlmul(machInst.vtype8); - const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; [[maybe_unused]] const size_t offset = (this->microIdx % 2 == 0) ? 0 : micro_vlmax; - %(op_decl)s; - %(op_rd)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -757,7 +777,7 @@ Fault def template VectorFloatWideningDecodeBlock {{ switch(machInst.vtype8.vsew) { -case 0b010: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst, vlen); default: GEM5_UNREACHABLE; } @@ -771,7 +791,7 @@ private: int cnt = 0; %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -781,14 +801,14 @@ public: def template ViotaMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -819,7 +839,7 @@ private: bool vm; int* cnt; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, int* cnt); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -831,7 +851,7 @@ def template ViotaMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, int* cnt) + uint32_t _microVl, uint8_t _microIdx, int* cnt) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -871,6 +891,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -919,6 +940,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -997,7 +1019,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1006,14 +1028,14 @@ public: def template VectorIntMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1028,7 +1050,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1050,7 +1072,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1061,7 +1083,7 @@ def template VectorIntMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1099,10 +1121,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1119,7 +1142,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1128,14 +1151,14 @@ public: def template VectorFloatMaskMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1150,7 +1173,7 @@ template micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); } microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, - this->microops.size()); + this->microops.size(), _vlen); this->microops.push_back(microop); this->microops.front()->setFirstMicroop(); @@ -1171,7 +1194,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1182,7 +1205,7 @@ def template VectorFloatMaskMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1220,10 +1243,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(vm_decl_rd)s; %(copy_old_vd)s; - constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t bit_offset = vlenb / sizeof(ElemType); const uint16_t offset = bit_offset * microIdx; %(code)s; @@ -1276,7 +1300,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1287,7 +1311,7 @@ public: def template VMvWholeMicroConstructor {{ %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1321,7 +1345,8 @@ Fault %(op_decl)s; %(op_rd)s; - for (size_t i = 0; i < (VLEN / 64); i++) { + %(set_vlen)s; + for (size_t i = 0; i < (vlen / 64); i++) { %(code)s; } %(op_wb)s; @@ -1382,6 +1407,7 @@ Fault %(op_decl)s; %(op_rd)s; // TODO: remove it + %(set_vlenb)s; %(copy_old_vd)s; %(code)s; %(op_wb)s; @@ -1489,6 +1515,28 @@ Fault }}; +def template VectorFloatNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntNonSplitDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + def template VectorReduceMacroDeclare {{ template @@ -1496,7 +1544,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1505,14 +1553,14 @@ public: def template VectorReduceMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1544,7 +1592,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1555,7 +1603,7 @@ def template VectorReduceMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1593,6 +1641,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1600,7 +1650,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { microop_result = f(microop_result, Vs2[i]); } @@ -1625,6 +1676,7 @@ Fault %(type_def)s; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -1638,6 +1690,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1647,7 +1701,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1685,6 +1740,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -1694,7 +1751,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]).v; } @@ -1716,7 +1774,7 @@ class %(class_name)s : public %(base_class)s{ private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1725,8 +1783,9 @@ public: def template VectorGatherMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1740,7 +1799,8 @@ template const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; const uint8_t vd_vregs = vs2_vregs; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs1_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1778,7 +1838,7 @@ private: bool vm; public: %(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx); + uint32_t _microVl, uint8_t _microIdx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; }; @@ -1789,7 +1849,7 @@ def template VectorGatherMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx) + uint32_t _microVl, uint8_t _microIdx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -1839,17 +1899,19 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; - const uint32_t vlmax = vtype_VLMAX(vtype); + const uint32_t vlmax = vtype_VLMAX(vtype,vlen); constexpr uint8_t vd_eewb = sizeof(ElemType); constexpr uint8_t vs1_eewb = sizeof(IndexType); constexpr uint8_t vs2_eewb = sizeof(ElemType); constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; - [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; - [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; - [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const uint16_t vd_elems = vlenb / vd_eewb; + [[maybe_unused]] const uint16_t vs1_elems = vlenb / vs1_eewb; + [[maybe_unused]] const uint16_t vs2_elems = vlenb / vs2_eewb; [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; @@ -1875,19 +1937,19 @@ def template VectorGatherDecodeBlock {{ switch(machInst.vtype8.vsew) { case 0b000: { using elem_type [[maybe_unused]] = uint8_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b001: { using elem_type [[maybe_unused]] = uint16_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b010: { using elem_type [[maybe_unused]] = uint32_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } case 0b011: { using elem_type [[maybe_unused]] = uint64_t; - return new %(class_name)s(machInst); + return new %(class_name)s(machInst, vlen); } default: GEM5_UNREACHABLE; } @@ -1902,7 +1964,7 @@ private: %(reg_idx_arr_decl)s; bool vxsat = false; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -1911,14 +1973,14 @@ public: def template VectorIntVxsatMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -1954,7 +2016,7 @@ private: bool vm; bool* vxsatptr; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -1966,7 +2028,7 @@ def template VectorIntVxsatMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + uint32_t _microVl, uint8_t _microIdx, bool* vxsatptr) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx) { @@ -2007,6 +2069,8 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(vm_decl_rd)s; %(copy_old_vd)s; @@ -2016,7 +2080,8 @@ Fault [&, this](const auto& f, const auto* _, const auto* vs2) { vwu tmp_val = Vd[0]; for (uint32_t i = 0; i < this->microVl; i++) { - uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * + this->microIdx; if (this->vm || elem_mask(v0, ei)) { tmp_val = f(tmp_val, Vs2[i]); } @@ -2038,7 +2103,7 @@ class %(class_name)s : public %(base_class)s { private: %(reg_idx_arr_decl)s; public: - %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -2047,14 +2112,14 @@ public: def template VectorSlideUpMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2082,14 +2147,14 @@ template def template VectorSlideDownMacroConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; const uint32_t num_microops = vtype_regs_per_group(vtype); int32_t tmp_vl = this->vl; - const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, vlen, true); int32_t micro_vl = std::min(tmp_vl, micro_vlmax); StaticInstPtr microop; @@ -2126,7 +2191,7 @@ private: RegId destRegIdxArr[1]; bool vm; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; using %(base_class)s::generateDisassembly; @@ -2138,7 +2203,7 @@ def template VectorSlideMicroConstructor {{ template %(class_name)s::%(class_name)s(ExtMachInst _machInst, - uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + uint32_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, _microIdx, _vdIdx, _vs2Idx) { @@ -2174,10 +2239,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; @@ -2210,10 +2278,13 @@ Fault status.vs = VPUStatus::DIRTY; xc->setMiscReg(MISCREG_STATUS, status); - [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); - %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; + + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype, vlen); + %(vm_decl_rd)s; %(copy_old_vd)s; %(code)s; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index 2b3b9187bf..8cbab044ec 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -34,6 +34,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -48,6 +49,7 @@ private: %(reg_idx_arr_decl)s; public: %(class_name)s(ExtMachInst _machInst); + %(class_name)s(ExtMachInst _machInst, uint32_t _vlen); using %(base_class)s::generateDisassembly; }; @@ -55,16 +57,17 @@ public: def template VleConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; if (micro_vl == 0) { @@ -72,7 +75,7 @@ def template VleConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -93,9 +96,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, - _microIdx) + %(class_name)s(ExtMachInst _machInst, uint32_t _microVl, + uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -127,12 +131,15 @@ Fault Addr EA; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; RiscvISA::vreg_t tmp_v0; uint8_t *v0; MISA misa = xc->readMiscReg(MISCREG_ISA); STATUS status = xc->readMiscReg(MISCREG_STATUS); + if (!misa.rvv || status.vs == VPUStatus::OFF) { return std::make_shared( "RVV is disabled or VPU is off", machInst); @@ -150,15 +157,18 @@ Fault } uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); if (fault != NoFault) return fault; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; %(memacc_code)s; @@ -176,10 +186,12 @@ Fault %(class_name)s::initiateAcc(ExecContext* xc, trace::InstRecord* traceData) const { + Addr EA; %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; MISA misa = xc->readMiscReg(MISCREG_ISA); @@ -192,6 +204,7 @@ Fault return std::make_shared("VILL is set", machInst); uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -208,6 +221,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -222,8 +236,9 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); - const size_t micro_elems = VLEN / width_EEW(machInst.width); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); + const size_t micro_elems = vlen / width_EEW(machInst.width); + size_t ei; for (size_t i = 0; i < micro_elems; i++) { ei = i + micro_vlmax * microIdx; @@ -238,13 +253,13 @@ Fault def template VseConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); @@ -256,7 +271,7 @@ def template VseConstructor {{ this->microops.push_back(microop); } for (int i = 0; i < num_microops && micro_vl > 0; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop = new %(class_name)sMicro(_machInst, micro_vl, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -277,9 +292,10 @@ private: RegId srcRegIdxArr[3]; RegId destRegIdxArr[0]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -326,9 +342,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -375,9 +393,11 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; - const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, vlen, true); const size_t eewb = width_EEW(machInst.width) / 8; const size_t mem_size = eewb * microVl; std::vector byte_enable(mem_size, false); @@ -412,20 +432,20 @@ Fault def template VlmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop = new Vle8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); } @@ -439,20 +459,20 @@ def template VlmConstructor {{ def template VsmConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t micro_vlmax = vlen / width_EEW(_machInst.width); int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; StaticInstPtr microop; if (micro_vl == 0) { microop = new VectorNopMicroInst(_machInst); } else { - microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop = new Vse8_vMicro(_machInst, micro_vl, 0, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); } @@ -466,18 +486,18 @@ def template VsmConstructor {{ def template VsWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsStore); this->microops.push_back(microop); @@ -497,9 +517,10 @@ private: RegId destRegIdxArr[0]; RegId srcRegIdxArr[2]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -532,14 +553,16 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem), - EA, memAccessFlags, nullptr); + vlenb, EA, memAccessFlags, nullptr); return fault; } @@ -560,14 +583,16 @@ Fault } %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - for (size_t i = 0; i < VLENB; i++) { + + for (size_t i = 0; i < vlenb; i++) { %(memacc_code)s; } Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem), - EA, memAccessFlags, nullptr); + EA, vlenb, memAccessFlags, nullptr); return fault; } @@ -586,18 +611,19 @@ Fault def template VlWholeConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; size_t NFIELDS = machInst.nf + 1; - const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + const int32_t micro_vlmax = vlen / width_EEW(_machInst.width); StaticInstPtr microop; for (int i = 0; i < NFIELDS; ++i) { - microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i, vlen); microop->setDelayedCommit(); microop->setFlag(IsLoad); this->microops.push_back(microop); @@ -617,9 +643,10 @@ private: RegId destRegIdxArr[1]; RegId srcRegIdxArr[1]; public: - %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) - : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, - _microVl, _microIdx) + %(class_name)s(ExtMachInst _machInst, + uint32_t _microVl, uint8_t _microIdx, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s_micro", _machInst, + %(op_class)s, _microVl, _microIdx, _vlen) { %(set_reg_idx_arr)s; _numSrcRegs = 0; @@ -657,14 +684,17 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; + %(set_vlen)s; %(ea_code)s; Fault fault = readMemAtomicLE(xc, traceData, EA, - *(vreg_t::Container*)(&Mem), memAccessFlags); + *(vreg_t::Container*)(&Mem), vlenb, + memAccessFlags); if (fault != NoFault) return fault; - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; i++) { %(memacc_code)s; } @@ -690,9 +720,11 @@ Fault } %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; - Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + const std::vector byte_enable(vlenb, true); + Fault fault = initiateMemRead(xc, EA, vlenb, memAccessFlags, byte_enable); return fault; } @@ -706,6 +738,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlen)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -713,7 +746,7 @@ Fault memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); - size_t elem_per_reg = VLEN / width_EEW(machInst.width); + size_t elem_per_reg = vlen / width_EEW(machInst.width); for (size_t i = 0; i < elem_per_reg; ++i) { %(memacc_code)s; } @@ -726,13 +759,13 @@ Fault def template VlStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -770,7 +803,7 @@ private: RegId destRegIdxArr[1]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -820,6 +853,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -833,7 +867,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -866,6 +900,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -877,7 +912,7 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -894,6 +929,7 @@ Fault { %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; STATUS status = xc->readMiscReg(MISCREG_STATUS); status.vs = VPUStatus::DIRTY; @@ -920,12 +956,12 @@ Fault memcpy(Vd, old_Vd, microVl * elem_size); // treat vta as vtu // if (machInst.vtype8.vta == 0) - memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + memcpy(Vd + microVl, old_Vd + microVl, vlenb - microVl * elem_size); } else { - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); } - size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + size_t ei = this->regIdx * vlenb / sizeof(Vd[0]) + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -939,13 +975,13 @@ Fault def template VsStrideConstructor {{ -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; - const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + const int32_t num_elems_per_vreg = vlen / width_EEW(_machInst.width); int32_t remaining_vl = this->vl; // Num of elems in one vreg int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); @@ -983,7 +1019,7 @@ private: RegId destRegIdxArr[0]; public: %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, - uint8_t _microVl) + uint32_t _microVl) : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, _regIdx, _microIdx, _microVl) { @@ -1025,6 +1061,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; @@ -1038,7 +1075,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1074,11 +1111,13 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); %(ea_code)s; uint32_t mem_size = elem_size; - size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + + size_t ei = this->regIdx * vlenb / elem_size + this->microIdx; bool need_store = machInst.vm || elem_mask(v0, ei); if (need_store) { const std::vector byte_enable(mem_size, need_store); @@ -1105,8 +1144,8 @@ Fault def template VlIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1115,7 +1154,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; - const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vd_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1212,6 +1252,7 @@ Fault %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vd[0]); RiscvISA::vreg_t tmp_v0; @@ -1223,8 +1264,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, byte_enable); @@ -1259,6 +1299,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); %(ea_code)s; // ea_code depends on elem_size @@ -1270,7 +1311,8 @@ Fault } uint32_t mem_size = elem_size; - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; bool need_load = machInst.vm || elem_mask(v0, ei); const std::vector byte_enable(mem_size, need_load); fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); @@ -1293,10 +1335,11 @@ Fault using vu = std::make_unsigned_t; %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; constexpr uint8_t elem_size = sizeof(Vd[0]); - RiscvISA::vreg_t old_vd; + RiscvISA::vreg_t old_vd;; decltype(Vd) old_Vd = nullptr; // We treat agnostic as undistrubed xc->getRegOperand(this, 2, &old_vd); @@ -1309,9 +1352,9 @@ Fault v0 = tmp_v0.as(); } - memcpy(Vd, old_Vd, VLENB); + memcpy(Vd, old_Vd, vlenb); - size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + size_t ei = this->vdRegIdx * vlenb / elem_size + this->vdElemIdx; if (machInst.vm || elem_mask(v0, ei)) { memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ @@ -1326,8 +1369,8 @@ Fault def template VsIndexConstructor {{ template -%(class_name)s::%(class_name)s(ExtMachInst _machInst) - : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +%(class_name)s::%(class_name)s(ExtMachInst _machInst, uint32_t _vlen) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _vlen) { %(set_reg_idx_arr)s; %(constructor)s; @@ -1336,7 +1379,8 @@ template const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; - const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + uint32_t vlenb = vlen >> 3; + const int32_t micro_vlmax = vlenb / std::max(vs3_eewb, vs2_eewb); int32_t remaining_vl = this->vl; int32_t micro_vl = std::min(remaining_vl, micro_vlmax); StaticInstPtr microop; @@ -1426,6 +1470,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1438,7 +1483,7 @@ Fault uint32_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1469,6 +1514,7 @@ Fault return std::make_shared("VILL is set", machInst); %(op_src_decl)s; %(op_rd)s; + %(set_vlenb)s; %(ea_code)s; constexpr uint8_t elem_size = sizeof(Vs3[0]); RiscvISA::vreg_t tmp_v0; @@ -1481,7 +1527,7 @@ Fault constexpr uint8_t mem_size = elem_size; const std::vector byte_enable(mem_size, true); - size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + size_t ei = this->vs3RegIdx * vlenb / elem_size + this->vs3ElemIdx; if (machInst.vm || elem_mask(v0, ei)) { %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ fault = xc->writeMem(Mem.as(), mem_size, EA, @@ -1504,6 +1550,10 @@ Fault }}; +def template VMemBaseDecodeBlock {{ + return new %(class_name)s(machInst, vlen); +}}; + def template VMemTemplateDecodeBlock {{ switch(machInst.vtype8.vsew) { @@ -1523,3 +1573,23 @@ switch(machInst.vtype8.vsew) { } }}; + +def template VMemSplitTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst, vlen); + } + case 0b001: { + return new %(class_name)s(machInst, vlen); + } + case 0b010: { + return new %(class_name)s(machInst, vlen); + } + case 0b011: { + return new %(class_name)s(machInst, vlen); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/pcstate.hh b/src/arch/riscv/pcstate.hh index 918e85708b..91fb507034 100644 --- a/src/arch/riscv/pcstate.hh +++ b/src/arch/riscv/pcstate.hh @@ -62,7 +62,7 @@ class PCState : public GenericISA::UPCState<4> bool _compressed = false; RiscvType _rvType = RV64; - uint64_t _vlenb = VLENB; + uint64_t _vlenb = 32; VTYPE _vtype = (1ULL << 63); // vtype.vill = 1 at initial; uint32_t _vl = 0; @@ -74,7 +74,7 @@ class PCState : public GenericISA::UPCState<4> PCState &operator=(const PCState &other) = default; PCState() = default; explicit PCState(Addr addr) { set(addr); } - explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb = VLENB) + explicit PCState(Addr addr, RiscvType rvType, uint64_t vlenb) { set(addr); _rvType = rvType; diff --git a/src/arch/riscv/regs/vector.hh b/src/arch/riscv/regs/vector.hh index 388e1cb78d..60c840395f 100644 --- a/src/arch/riscv/regs/vector.hh +++ b/src/arch/riscv/regs/vector.hh @@ -36,6 +36,7 @@ #include "arch/generic/vec_pred_reg.hh" #include "arch/generic/vec_reg.hh" +#include "arch/riscv/types.hh" #include "base/bitunion.hh" #include "cpu/reg_class.hh" #include "debug/VecRegs.hh" @@ -46,13 +47,10 @@ namespace gem5 namespace RiscvISA { -constexpr unsigned ELEN = 64; -constexpr unsigned VLEN = 256; -constexpr unsigned VLENB = VLEN / 8; - -using VecRegContainer = gem5::VecRegContainer; +using VecRegContainer = gem5::VecRegContainer; using vreg_t = VecRegContainer; + const int NumVecStandardRegs = 32; const int NumVecInternalRegs = 8; // Used by vector uop const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs; diff --git a/src/arch/riscv/types.hh b/src/arch/riscv/types.hh index 1d501dc05f..c7edffc2f7 100644 --- a/src/arch/riscv/types.hh +++ b/src/arch/riscv/types.hh @@ -42,7 +42,6 @@ #ifndef __ARCH_RISCV_TYPES_HH__ #define __ARCH_RISCV_TYPES_HH__ -#include "arch/riscv/pcstate.hh" #include "base/bitunion.hh" namespace gem5 @@ -178,6 +177,10 @@ BitUnion64(ExtMachInst) EndBitUnion(ExtMachInst) +constexpr unsigned MaxVecLenInBits = 65536; +constexpr unsigned MaxVecLenInBytes = MaxVecLenInBits >> 3; + + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 40054aec0f..bac499e523 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -268,12 +268,13 @@ vtype_SEW(const uint64_t vtype) * Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h */ inline uint64_t -vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +vtype_VLMAX(const uint64_t vtype, const uint64_t vlen, + const bool per_reg = false) { int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); lmul = per_reg ? std::min(0, lmul) : lmul; int64_t vsew = bits(vtype, 5, 3); - return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); + return vlen >> (vsew + 3 - lmul); } inline int64_t diff --git a/util/cpt_upgraders/riscv-dyn-vlen.py b/util/cpt_upgraders/riscv-dyn-vlen.py new file mode 100644 index 0000000000..ea2de9d19d --- /dev/null +++ b/util/cpt_upgraders/riscv-dyn-vlen.py @@ -0,0 +1,49 @@ +# Copyright (c) 2023 Barcelona Supercomputing Center (BSC) +# All rights reserved. + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def upgrader(cpt): + """ + Update the checkpoint to support initial RVV implemtation. + The updater is taking the following steps. + + Set vector registers to occupy 327680 bytes (40regs * 8192bytes). + Vector registers now ocupy this space regardless of VLEN as the + VecRegContainer is always MaxVecLenInBytes. + """ + + for sec in cpt.sections(): + import re + + # Search for all XC sections + + if re.search(r".*processor.*\.core.*\.xc.*", sec): + # Updating RVV vector registers (dummy values) + mr = cpt.get(sec, "regs.vector").split() + if len(mr) != 327680: + cpt.set( + sec, "regs.vector", " ".join("0" for i in range(327680)) + )