diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index f2bde629e9..a1ccf402c9 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -122,6 +122,93 @@ VConfOp::generateZimmDisassembly() const return s.str(); } +std::string +VectorNonSplitInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { @@ -295,5 +382,25 @@ std::string VsIndexMicroInst::generateDisassembly(Addr pc, return ss.str(); } +std::string +VMvWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + +std::string +VMvWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index f989d7ffbf..5d0874a994 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -89,6 +89,24 @@ inline uint8_t checked_vtype(bool vill, uint8_t vtype) { return vtype; } +class VectorNonSplitInst : public RiscvStaticInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorNonSplitInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvStaticInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + class VectorMacroInst : public RiscvMacroInst { protected: @@ -170,6 +188,63 @@ class VectorArithMacroInst : public VectorMacroInst Addr pc, const loader::SymbolTable *symtab) const override; }; +class VectorVMUNARY0MicroInst : public VectorMicroInst +{ +protected: + VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorVMUNARY0MacroInst : public VectorMacroInst +{ + protected: + VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMacroInst : public VectorMacroInst +{ + protected: + VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMicroInst : public VectorMicroInst +{ + protected: + uint8_t vdIdx; + uint8_t vs2Idx; + VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , vdIdx(_vdIdx), vs2Idx(_vs2Idx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + class VectorMemMicroInst : public VectorMicroInst { protected: @@ -421,6 +496,131 @@ class VsIndexMicroInst : public VectorMemMicroInst Addr pc, const loader::SymbolTable *symtab) const override; }; +class VMvWholeMacroInst : public VectorArithMacroInst +{ + protected: + VMvWholeMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorArithMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VMvWholeMicroInst : public VectorArithMicroInst +{ + protected: + VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +template +class VMaskMergeMicroInst : public VectorArithMicroInst +{ + private: + RegId srcRegIdxArr[NumVecInternalRegs]; + RegId destRegIdxArr[1]; + + public: + VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, + uint8_t _numSrcs) + : VectorArithMicroInst("vmask_mv_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + setRegIdxArrays( + reinterpret_cast( + &std::remove_pointer_t::srcRegIdxArr), + reinterpret_cast( + &std::remove_pointer_t::destRegIdxArr)); + + _numSrcRegs = 0; + _numDestRegs = 0; + + setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]); + _numTypedDestRegs[VecRegClass]++; + for (uint8_t i=0; i<_numSrcs; i++) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]); + } + } + + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override { + vreg_t tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + auto Vd = tmp_d0.as(); + constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + size_t bit_cnt = elems_per_vreg; + vreg_t tmp_s; + xc->getRegOperand(this, 0, &tmp_s); + auto s = tmp_s.as(); + // cp the first result and tail + memcpy(Vd, s, VLENB); + for (uint8_t i = 1; i < this->_numSrcRegs; i++) { + xc->getRegOperand(this, i, &tmp_s); + s = tmp_s.as(); + if constexpr (elems_per_vreg < 8) { + constexpr uint8_t m = (1 << elems_per_vreg) - 1; + const uint8_t mask = m << (i * elems_per_vreg % 8); + // clr & ext bits + Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; + Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; + bit_cnt += elems_per_vreg; + } else { + constexpr uint8_t byte_offset = elems_per_vreg / 8; + memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); + } + } + xc->setRegOperand(this, 0, &tmp_d0); + if (traceData) + traceData->setData(vecRegClass, &tmp_d0); + return NoFault; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + for (uint8_t i = 0; i < this->_numSrcRegs; i++) { + ss << ", " << registerName(srcRegIdx(i)); + } + ss << ", offset:" << VLENB / sizeof(ElemType); + return ss.str(); + } +}; + +class VxsatMicroInst : public VectorArithMicroInst +{ + private: + bool* vxsat; + public: + VxsatMicroInst(bool* Vxsat, ExtMachInst extMachInst) + : VectorArithMicroInst("vxsat_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + vxsat = Vxsat; + } + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override + { + xc->setMiscReg(MISCREG_VXSAT,*vxsat); + auto vcsr = xc->readMiscReg(MISCREG_VCSR); + xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat)); + return NoFault; + } + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); + return ss.str(); + } +}; } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 0288f37ad8..2b46752ffe 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2281,6 +2281,2060 @@ decode QUADRANT default Unknown::unknown() { } 0x15: decode FUNCT3 { + // OPIVV + 0x0: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vv({{ + Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x2: vsub_vv({{ + Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x4: vminu_vv({{ + Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x5: vmin_vv({{ + Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x6: vmaxu_vv({{ + Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x7: vmax_vv({{ + Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x9: vand_vv({{ + Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xa: vor_vv({{ + Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xb: vxor_vv({{ + Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Vs1_vu[i] + - vs2_elems * vs2_idx; + auto res = (Vs1_vu[i] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + Vd_vu[i] = res; + } + } + }}, OPIVV, VectorMiscOp); + 0x0e: VectorGatherFormat::vrgatherei16_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint16_t idx = Vs1_uh[i + vs1_bias] + - vs2_elems * vs2_idx; + auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i + vd_bias]; + Vd_vu[i + vd_bias] = res; + } + } + }}, OPIVV, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vvm({{ + Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i] + + elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vvm({{ + Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i] + - elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vvm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? Vs1_vu[i] + : Vs2_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_v({{ + Vd_vu[i] = Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vv({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x21: vsadd_vv({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x22: vssubu_vv({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x23: vssub_vv({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x27: vsmul_vv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Vs1_vi[i] == Vs2_vi[i] && + Vs1_vi[i] == min; + __int128_t result = (__int128_t)Vs1_vi[i] * + (__int128_t)Vs2_vi[i]; + result = int_rounding<__int128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vv({{ + Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x28: vsrl_vv({{ + Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x29: vsra_vv({{ + Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x2a: vssrl_vv({{ + int sh = Vs1_vu[i] & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + 0x2b: vssra_vv({{ + int sh = Vs1_vi[i] & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorReduceIntWideningFormat { + 0x30: vwredsumu_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwu, Vs2_vu); + }}, OPIVV, VectorIntegerReduceOp); + 0x31: vwredsum_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwi, Vs2_vi); + }}, OPIVV, VectorIntegerReduceOp); + } + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmadc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmsbc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x18: vmseq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x19: vmsne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1a: vmsltu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1b: vmslt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1c: vmsleu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1d: vmsle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wv({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2d: vnsra_wv({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2e: vnclipu_wv({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + res = max; + // TODO: vxsat + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVV, VectorIntegerArithOp); + 0x2f: vnclip_wv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVV, VectorIntegerArithOp); + } + } + // OPFVV + 0x1: decode VFUNCT6 { + 0x00: VectorFloatFormat::vfadd_vv({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x01: VectorReduceFloatFormat::vfredusum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x02: VectorFloatFormat::vfsub_vv({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x03: VectorReduceFloatFormat::vfredosum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x04: VectorFloatFormat::vfmin_vv({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: VectorReduceFloatFormat::vfredmin_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmin(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x06: VectorFloatFormat::vfmax_vv({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x07: VectorReduceFloatFormat::vfredmax_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmax(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x08: VectorFloatFormat::vfsgnj_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x09: VectorFloatFormat::vfsgnjn_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + true, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x0a: VectorFloatFormat::vfsgnjx_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, true).v; + }}, OPFVV, VectorFloatArithOp); + // VWFUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.f.s are reserved + 0x1: VectorNonSplitFormat::vfmv_f_s({{ + freg_t fd = freg(Vs2_vu[0]); + Fd_bits = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + 0x12: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfcvt_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x01: vfcvt_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x02: vfcvt_f_xu_v({{ + auto fd = ui_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x03: vfcvt_f_x_v({{ + auto fd = i_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x06: vfcvt_rtz_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x07: vfcvt_rtz_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatWideningCvtFormat { + 0x08: vfwcvt_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x09: vfwcvt_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x0a: vfwcvt_f_xu_v({{ + auto fd = ui_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0b: vfwcvt_f_x_v({{ + auto fd = i_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0c: vfwcvt_f_f_v({{ + auto fd = f_to_wf( + ftype(Vs2_vu[i + offset])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0e: vfwcvt_rtz_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x0f: vfwcvt_rtz_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatNarrowingCvtFormat { + 0x10: vfncvt_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x11: vfncvt_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x12: vfncvt_f_xu_w({{ + auto fd = ui_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x13: vfncvt_f_x_w({{ + auto fd = i_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x14: vfncvt_f_f_w({{ + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x15: vfncvt_rod_f_f_w({{ + softfloat_roundingMode = softfloat_round_odd; + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x16: vfncvt_rtz_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x17: vfncvt_rtz_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + } + 0x13: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfsqrt_v({{ + auto fd = fsqrt(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x04: vfrsqrt7_v({{ + auto fd = frsqrte7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: vfrec7_v({{ + auto fd = frecip7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x10: vfclass_v({{ + auto fd = fclassify(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + + format VectorFloatMaskFormat { + 0x18: vmfeq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x19: vmfle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1b: vmflt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1c: vmfne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + } + format VectorFloatFormat { + 0x20: vfdiv_vv({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x24: vfmul_vv({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x28: vfmadd_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x29: vfnmadd_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2a: vfmsub_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2b: vfnmsub_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2c: vfmacc_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2d: vfnmacc_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2e: vfmsac_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2f: vfnmsac_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vv({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x32: vfwsub_vv({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x34: vfwadd_wv({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x36: vfwsub_wv({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x38: vfwmul_vv({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3c: vfwmacc_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3d: vfwnmacc_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3e: vfwmsac_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3f: vfwnmsac_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + // OPMVV + 0x2: decode VFUNCT6 { + format VectorReduceIntFormat { + 0x0: vredsum_vs({{ + Vd_vi[0] = + reduce_loop(std::plus(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x1: vredand_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_and(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x2: vredor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_or(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x3: vredxor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_xor(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x4: vredminu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::min(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x5: vredmin_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::min(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x6: vredmaxu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::max(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x7: vredmax_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::max(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + } + format VectorIntFormat { + 0x8: vaaddu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0x9: vaadd_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xa: vasubu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xb: vasub_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + } + // VWXUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.x.s are reserved. + 0x1: VectorNonSplitFormat::vmv_x_s({{ + Rd_ud = Vs2_vi[0]; + }}, OPMVV, VectorMiscOp); + } + 0x10: Vector1Vs1RdMaskFormat::vcpop_m({{ + uint64_t popcount = 0; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + if(this->vm){ + popcount += vs2_lsb; + }else{ + bool do_mask = elem_mask(v0, i); + popcount += (vs2_lsb && do_mask); + } + } + Rd_vu = popcount; + }}, OPMVV, VectorMiscOp); + 0x11: Vector1Vs1RdMaskFormat::vfirst_m({{ + int64_t pos = -1; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + if(this->vm == 0){ + if(elem_mask(v0, i)==0){ + continue; + } + } + bool vs2_lsb = elem_mask(Vs2_vu, i); + if (vs2_lsb) { + pos = i; + break; + } + } + Rd_vu = pos; + }}, OPMVV, VectorMiscOp); + } + 0x12: decode VS1 { + format VectorIntExtFormat { + 0x02: vzext_vf8({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x03: vsext_vf8({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x04: vzext_vf4({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x05: vsext_vf4({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x06: vzext_vf2({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x07: vsext_vf2({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + } + } + 0x14: decode VS1 { + 0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x02: Vector1Vs1VdMaskFormat::vmsof_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x03: Vector1Vs1VdMaskFormat::vmsif_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x10: ViotaFormat::viota_m({{ + RiscvISAInst::VecRegContainer tmp_s2; + xc->getRegOperand(this, 2, + &tmp_s2); + auto Vs2bit = tmp_s2.as(); + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + + vtype_VLMAX(vtype, true) * this->microIdx; + bool vs2_lsb = elem_mask(Vs2bit, ei); + bool do_mask = elem_mask(v0, ei); + bool has_one = false; + if (this->vm || (do_mask && !this->vm)) { + if (vs2_lsb) { + has_one = true; + } + } + bool use_ori = (!this->vm) && !do_mask; + if(use_ori == false){ + Vd_vu[i] = *cnt; + } + if (has_one) { + *cnt = *cnt+1; + } + } + }}, OPMVV, VectorMiscOp); + 0x11: VectorIntFormat::vid_v({{ + Vd_vu[i] = ei; + }}, OPMVV, VectorMiscOp); + } + format VectorMaskFormat { + 0x18: vmandn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x19: vmand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1a: vmor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1b: vmxor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1c: vmorn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1d: vmnand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1e: vmnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1f: vmxnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + } + format VectorIntFormat { + 0x20: vdivu_vv({{ + if (Vs1_vu[i] == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Vs1_vu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x21: vdiv_vv({{ + if (Vs1_vi[i] == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x22: vremu_vv({{ + if (Vs1_vu[i] == 0) { + Vd_vu[i] = Vs2_vu[i]; + } else { + Vd_vu[i] = Vs2_vu[i] % Vs1_vu[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x23: vrem_vv({{ + if (Vs1_vi[i] == 0) { + Vd_vi[i] = Vs2_vi[i]; + } else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) { + Vd_vi[i] = 0; + } else { + Vd_vi[i] = Vs2_vi[i] % Vs1_vi[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x24: vmulhu_vv({{ + if (sew < 64) { + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Vs1_vu[i]) + >> sew; + } else { + Vd_vu[i] = mulhu_64(Vs2_vu[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x25: vmul_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x26: vmulhsu_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Vs1_vu[i]) + >> sew; + } else { + Vd_vi[i] = mulhsu_64(Vs2_vi[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x27: vmulh_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Vs1_vi[i]) + >> sew; + } else { + Vd_vi[i] = mulh_64(Vs2_vi[i], Vs1_vi[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x29: vmadd_vv({{ + Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2b: vnmsub_vv({{ + Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2d: vmacc_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2f: vnmsac_vv({{ + Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x31: vwadd_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x32: vwsubu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x33: vwsub_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x34: vwaddu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x35: vwadd_wv({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x36: vwsubu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x37: vwsub_wv({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x38: vwmulu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3a: vwmulsu_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3b: vwmul_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3c: vwmaccu_vv({{ + Vd_vwu[i] = vwu(Vs1_vu[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3d: vwmacc_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3f: vwmaccsu_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + } + // OPIVI + 0x3: decode VFUNCT6 { + format VectorIntFormat { + 0x00: vadd_vi({{ + Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x03: vrsub_vi({{ + Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x09: vand_vi({{ + Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0a: vor_vi({{ + Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0b: vxor_vi({{ + Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vi({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = + (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx; + Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVI, VectorMiscOp); + 0x0e: VectorSlideUpFormat::vslideup_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVI, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVI, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vim({{ + Vd_vi[i] = Vs2_vi[i] + + (vi)sext<5>(SIMM5) + elem_mask(v0, ei); + }}, OPIVI, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vim({{ + Vd_vi[i] = elem_mask(v0, ei) + ? (vi)sext<5>(SIMM5) + : Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmv_v_i({{ + Vd_vi[i] = (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vi({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + 0x21: vsadd_vi({{ + Vd_vu[i] = sat_add(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vi({{ + Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x28: vsrl_vi({{ + Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2a: vssrl_vi({{ + int sh = SIMM5 & (vtype_SEW(vtype) - 1); + __uint128_t res = Vs2_vu[i]; + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, sh) >> sh; + + Vd_vu[i] = res; + }}, OPIVI, VectorIntegerArithOp); + 0x29: vsra_vi({{ + Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2b: vssra_vi({{ + int sh = SIMM5 & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVI, VectorIntegerArithOp); + } + // According to Spec Section 16.6, + // vm must be 1 (unmasked) in vmvr.v instructions. + 0x27: decode VM { 0x1: decode SIMM3 { + format VMvWholeFormat { + 0x0: vmv1r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x1: vmv2r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x3: vmv4r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x7: vmv8r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + } + }} + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vim({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5), + elem_mask(v0, ei))); + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmadc_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + 0x18: vmseq_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] == (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x19: vmsne_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] != (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1c: vmsleu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1d: vmsle_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1e: vmsgtu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1f: vmsgt_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wi({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2d: vnsra_wi({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2e: vnclipu_wi({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVI, VectorIntegerArithOp); + 0x2f: vnclip_wi({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVI, VectorIntegerArithOp); + } + } + // OPIVX + 0x4: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vx({{ + Vd_vu[i] = Vs2_vu[i] + Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x2: vsub_vx({{ + Vd_vu[i] = Vs2_vu[i] - Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x3: vrsub_vx({{ + Vd_vu[i] = Rs1_vu - Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x4: vminu_vx({{ + Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x5: vmin_vx({{ + Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x6: vmaxu_vx({{ + Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x7: vmax_vx({{ + Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x9: vand_vx({{ + Vd_vu[i] = Vs2_vu[i] & Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xa: vor_vx({{ + Vd_vu[i] = Vs2_vu[i] | Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xb: vxor_vx({{ + Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslideup_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0c: VectorGatherFormat::vrgather_vx({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx; + Vd_vu[i] = (Rs1_vu >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVX, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vxm({{ + Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vxm({{ + Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vxm({{ + Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_x({{ + Vd_vu[i] = Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vx({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x21: vsadd_vx({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x22: vssubu_vx({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x23: vssub_vx({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x27: vsmul_vx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min; + __int128_t result = + (__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i]; + result = int_rounding<__uint128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vx({{ + Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x28: vsrl_vx({{ + Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x29: vsra_vx({{ + Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x2a: vssrl_vx({{ + int sh = Rs1_vu & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + 0x2b: vssra_vx({{ + int sh = Rs1_vu & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wx({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2d: vnsra_wx({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2e: vnclipu_wx({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Rs1_vu & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVX, VectorIntegerArithOp); + 0x2f: vnclip_wx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Rs1_vi & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVX, VectorIntegerArithOp); + } + + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmadc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmsbc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x18: vmseq_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x19: vmsne_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1a: vmsltu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1b: vmslt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1c: vmsleu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1d: vmsle_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1e: vmsgtu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1f: vmsgt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + } + // OPFVF + 0x5: decode VFUNCT6 { + format VectorFloatFormat{ + 0x00: vfadd_vf({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x02: vfsub_vf({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x04: vfmin_vf({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x06: vfmax_vf({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x08: vfsgnj_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x09: vfsgnjn_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + true, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x0a: vfsgnjx_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, true).v; + }}, OPFVF, VectorFloatArithOp); + } + 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPFVF, VectorMiscOp); + 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPFVF, VectorMiscOp); + // VRFUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.s.f are reserved + 0x1: VectorNonSplitFormat::vfmv_s_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[0] = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + format VectorFloatFormat{ + 0x17: decode VM { + 0x0: vfmerge_vfm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? ftype_freg(freg(Fs1_bits)).v + : Vs2_vu[i]; + }}, OPFVF, VectorFloatArithOp); + 0x1: vfmv_v_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + format VectorFloatMaskFormat { + 0x18: vmfeq_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x19: vmfle_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1b: vmflt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1c: vmfne_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1d: vmfgt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + 0x1f: vmfge_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatFormat{ + 0x20: vfdiv_vf({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x21: vfrdiv_vf({{ + auto fd = fdiv(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x24: vfmul_vf({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x27: vfrsub_vf({{ + auto fd = fsub(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x28: vfmadd_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x29: vfnmadd_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2a: vfmsub_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2b: vfnmsub_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2c: vfmacc_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2d: vfnmacc_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i])) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2e: vfmsac_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2f: vfnmsac_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i]) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vf({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x32: vfwsub_vf({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x34: vfwadd_wf({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x36: vfwsub_wf({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x38: vfwmul_vf({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3c: vfwmacc_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3d: vfwnmacc_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3e: vfwmsac_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3f: vfwnmsac_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + // OPMVX + 0x6: decode VFUNCT6 { + format VectorIntFormat { + 0x08: vaaddu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x09: vaadd_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslide1up_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslide1down_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPIVX, VectorMiscOp); + // VRXUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.s.x are reserved. + 0x1: VectorNonSplitFormat::vmv_s_x({{ + Vd_vu[0] = Rs1_vu; + }}, OPMVX, VectorMiscOp); + } + } + format VectorIntFormat { + 0x0a: vasubu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x0b: vasub_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x20: vdivu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x21: vdiv_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x22: vremu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = Vs2_vu[i]; + else + Vd_vu[i] = Vs2_vu[i] % Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x23: vrem_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = Vs2_vi[i]; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = 0; + else + Vd_vi[i] = Vs2_vi[i] % Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x24: vmulhu_vx({{ + if (sew < 64) + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Rs1_vu) + >> sew; + else + Vd_vu[i] = mulhu_64(Vs2_vu[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x25: vmul_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x26: vmulhsu_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Rs1_vu) + >> sew; + else + Vd_vi[i] = mulhsu_64(Vs2_vi[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x27: vmulh_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Rs1_vi) + >> sew; + else + Vd_vi[i] = mulh_64(Vs2_vi[i], Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x29: vmadd_vx({{ + Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2b: vnmsub_vx({{ + Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2d: vmacc_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2f: vnmsac_vx({{ + Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x31: vwadd_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x32: vwsubu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x33: vwsub_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x34: vwaddu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x35: vwadd_wx({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x36: vwsubu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x37: vwsub_wx({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x38: vwmulu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3a: vwmulsu_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3b: vwmul_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x3c: vwmaccu_vx({{ + Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3d: vwmacc_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3e: vwmaccus_vx({{ + Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3f: vwmaccsu_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + } 0x7: decode BIT31 { format VConfOp { 0x0: vsetvli({{ diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 4bdc3021d5..0102df17d7 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -38,6 +38,7 @@ ##include "amo.isa" ##include "bs.isa" ##include "vector_conf.isa" +##include "vector_arith.isa" ##include "vector_mem.isa" // Include formats for nonstandard extensions diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa new file mode 100644 index 0000000000..c462e6c8d4 --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -0,0 +1,1319 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + def setDestWrapper(destRegId): + return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ + "_numTypedDestRegs[VecRegClass]++;\n" + def setSrcWrapper(srcRegId): + return "setSrcRegIdx(_numSrcRegs++, " + srcRegId + ");\n" + def setSrcVm(): + return "if (!this->vm)\n" + \ + " setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);" + def vmDeclAndReadData(): + return ''' + [[maybe_unused]] RiscvISA::vreg_t tmp_v0; + [[maybe_unused]] uint8_t* v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + ''' + def copyOldVd(vd_idx): + return 'COPY_OLD_VD(%d);' % vd_idx + def loopWrapper(code, micro_inst = True): + if micro_inst: + upper_bound = "this->microVl" + else: + upper_bound = "(uint32_t)machInst.vl" + return ''' + for (uint32_t i = 0; i < %s; i++) { + %s + } + ''' % (upper_bound, code) + def maskCondWrapper(code): + return "if (this->vm || elem_mask(v0, ei)) {\n" + \ + code + "}\n" + def eiDeclarePrefix(code, widening = False): + if widening: + return ''' + uint32_t ei = i + micro_vlmax * this->microIdx; + ''' + code + else: + return ''' + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + ''' + code + + def wideningOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned Vd group in Widening op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group if The destination EEW is greater than the source + // EEW, the source EMUL is at least 1, and the overlap is in the + // highest- numbered part of the destination register group. + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Widening op"); + return std::make_shared(error, machInst); + } + ''' + code + + def narrowingOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned VS2 group in Narrowing op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group The destination EEW is smaller than the source EEW + // and the overlap is in the lowest-numbered part of the source + // register group + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op"); + return std::make_shared(error, machInst); + } + ''' + code + + def fflags_wrapper(code): + return ''' + RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); + std::feclearexcept(FE_ALL_EXCEPT); + ''' + code + ''' + FFLAGS |= softfloat_exceptionFlags; + softfloat_exceptionFlags = 0; + xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + ''' +}}; + + +def format VectorIntFormat(code, category, *flags) {{ + macroop_class_name = 'VectorArithMacroInst' + microop_class_name = 'VectorArithMicroInst' + + if name == "vid_v" : + macroop_class_name = 'VectorVMUNARY0MacroInst' + microp_class_name = 'VectorVMUNARY0MicroInst' + + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + num_src_regs = 0 + + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + num_src_regs += 1 + + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + num_src_regs += 1 + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + num_src_regs += 1 + elif category == "OPIVI": + pass + else: + error("not supported category for VectorIntFormat: %s" % category) + + old_vd_idx = num_src_regs + src3_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + + +def format VectorIntExtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + ext_div = int(inst_suffix[-1]) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / " + \ + str(ext_div) + "]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'ext_div': ext_div}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntExtMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntExtMicroExecute.subst(microiop) + \ + VectorIntExtMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntNarrowingFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + mask_cond = True + need_elem_idx = True + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + # code + code = maskCondWrapper(code) + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + }, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntNarrowingMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = not (inst_name in ["vmadc", "vmsbc"] \ + and inst_suffix in ["vv", "vx", "vi"]) + mask_cond = inst_name not in ['vmadc', 'vmsbc'] + need_elem_idx = mask_cond or code.find("ei") != -1 + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPIVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPIVX": + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + #code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMaskMicroDeclare.subst(microiop) + \ + VectorIntMaskMicroConstructor.subst(microiop) + \ + VectorIntMaskMicroExecute.subst(microiop) + \ + VectorIntMaskMacroDeclare.subst(iop) + \ + VectorIntMaskMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorGatherFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + if inst_name == "vrgatherei16": + idx_type = "uint16_t" + else: + idx_type = "elem_type" + iop = InstObjParams(name, Name, 'VectorArithMacroInst', + {'idx_type': idx_type, + 'code': code}, + flags) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + vs1_idx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + vd_idx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + # code + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'idx_type': idx_type}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorGatherMicroDeclare.subst(microiop) + \ + VectorGatherMicroConstructor.subst(microiop) + \ + VectorGatherMicroExecute.subst(microiop) + \ + VectorGatherMacroDeclare.subst(iop) + \ + VectorGatherMacroConstructor.subst(iop) + + decode_block = VectorGatherDecodeBlock.subst(iop) + +}}; + +def format VectorFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vfmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPFVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPFVF"]: + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = fflags_wrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatNarrowingMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMaskMicroDeclare.subst(microiop) + \ + VectorFloatMaskMicroConstructor.subst(microiop) + \ + VectorFloatMaskMicroExecute.subst(microiop) + \ + VectorFloatMaskMacroDeclare.subst(iop) + \ + VectorFloatMaskMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VMvWholeFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VMvWholeMacroInst', {'code': code}, flags) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VMvWholeMicroInst', + {'code': code}, + flags) + + header_output = \ + VMvWholeMacroDeclare.subst(iop) + \ + VMvWholeMicroDeclare.subst(microiop) + decoder_output = \ + VMvWholeMacroConstructor.subst(iop) + \ + VMvWholeMicroConstructor.subst(microiop) + exec_output = VMvWholeMicroExecute.subst(microiop) + decode_block = BasicDecode.subst(iop) +}}; + +def format ViotaFormat(code, category, *flags){{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + + microiop = InstObjParams(name+"_micro", + Name+"Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + ViotaMicroDeclare.subst(microiop) + \ + ViotaMicroConstructor.subst(microiop) + \ + ViotaMicroExecute.subst(microiop)+\ + ViotaMacroDeclare.subst(iop) + \ + ViotaMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) + +}}; + +def format Vector1Vs1VdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1VdMaskConstructor.subst(iop) + \ + Vector1Vs1VdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format Vector1Vs1RdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1RdMaskConstructor.subst(iop) + \ + Vector1Vs1RdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorNonSplitFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = "" + + set_vm_idx = "" + + if inst_name == "vfmv" : + code = fflags_wrapper(code) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + + + if inst_name == "vfmv" : + execute_block = VectorFloatNonSplitExecute.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) + elif inst_name == "vmv" : + execute_block = VectorIntNonSplitExecute.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) + else : + error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorNonSplitDeclare.subst(iop) + \ + VectorNonSplitConstructor.subst(iop) + \ + execute_block + +}}; + +def format VectorMaskFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + if category not in ["OPMVV"]: + error("not supported category for VectorIntFormat: %s" % category) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + # TODO: remove it + old_dest_reg_id = "vecRegClass[_machInst.vd]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + code = loopWrapper(code, micro_inst = False) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorMaskDeclare.subst(iop) + \ + VectorMaskConstructor.subst(iop) + \ + VectorMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu = decltype(et::v); + ''' + + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntVxsatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntVxsatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntVxsatMicroDeclare.subst(microiop) + \ + VectorIntVxsatMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntVxsatMacroDeclare.subst(iop) + \ + VectorIntVxsatMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +let {{ + +def VectorSlideBase(name, Name, category, code, flags, macro_construtor, + decode_template, micro_execute_template): + macroop_class_name = 'VectorSlideMacroInst' + microop_class_name = 'VectorSlideMicroInst' + # Make sure flags are in lists (convert to lists if not). + flags = makeList(flags) + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]" + src1_ireg_id = "intRegClass[_machInst.rs1]" + src1_freg_id = "floatRegClass[_machInst.rs1]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + num_src_regs = 0 + + old_dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + set_src_reg_idx = "" + if category in ["OPIVX", "OPMVX"]: + set_src_reg_idx += setSrcWrapper(src1_ireg_id) + num_src_regs += 1 + elif category in ["OPFVF"]: + set_src_reg_idx += setSrcWrapper(src1_freg_id) + num_src_regs += 1 + set_src_reg_idx += setSrcWrapper(src2_reg_id) + num_src_regs += 1 + old_vd_idx = num_src_regs + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_src_reg_idx += setSrcVm() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorSlideMicroDeclare.subst(microiop) + \ + VectorSlideMicroConstructor.subst(microiop) + \ + micro_execute_template.subst(microiop) + \ + VectorSlideMacroDeclare.subst(iop) + \ + macro_construtor.subst(iop) + + decode_block = decode_template.subst(iop) + return (header_output, decode_block) + +}}; + +def format VectorSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorFloatSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; + +def format VectorFloatSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa index b4de46d846..ed3f5287c0 100644 --- a/src/arch/riscv/isa/templates/templates.isa +++ b/src/arch/riscv/isa/templates/templates.isa @@ -1,2 +1,32 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + // Include ##include "vector_mem.isa" +##include "vector_arith.isa" diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa new file mode 100644 index 0000000000..d15ab70f20 --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -0,0 +1,1989 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +output header {{ + +#define ASSIGN_VD_BIT(idx, bit) \ + ((Vd[(idx)/8] & ~(1 << (idx)%8)) | ((bit) << (idx)%8)) + +#define COPY_OLD_VD(idx) \ + [[maybe_unused]] RiscvISA::vreg_t old_vd; \ + [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ + xc->getRegOperand(this, (idx), &old_vd); \ + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, VLENB); + +#define VRM_REQUIRED \ + uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ + if (frm > 4) \ + return std::make_shared("RM fault", machInst); \ + softfloat_roundingMode = frm; + +template +bool inline +carry_out(Type a, Type b, bool carry_in = false) { + using TypeU = std::make_unsigned_t; + TypeU s = *reinterpret_cast(&a) + + *reinterpret_cast(&b) + carry_in; + return carry_in + ? (s <= *reinterpret_cast(&a)) + : (s < *reinterpret_cast(&a)); +} + +template +bool inline +borrow_out(Type a, Type b, bool borrow_in = false) { + using TypeU = std::make_unsigned_t; + return borrow_in + ? (*reinterpret_cast(&a) <= *reinterpret_cast(&b)) + : (*reinterpret_cast(&a) < *reinterpret_cast(&b)); +} + +}}; + +def template VectorIntMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorIntExtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + auto SEW = vtype_SEW(vtype); + auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { + case 8: { + using vext [[maybe_unused]] = int8_t; + using vextu [[maybe_unused]] = uint8_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 16: { + using vext [[maybe_unused]] = int16_t; + using vextu [[maybe_unused]] = uint16_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 32: { + using vext [[maybe_unused]] = int32_t; + using vextu [[maybe_unused]] = uint32_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + default: break; + } + + return NoFault; +} + +}}; + +def template VectorIntDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntWideningMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const int64_t vlmul = vtype_vlmul(_machInst.vtype8); + // Todo: move to Decode template + panic_if(vlmul == 3, "LMUL=8 is illegal for widening inst"); + // when LMUL setted as m1, need to split to 2 micro insts + const uint32_t num_microops = 1 << std::max(0, vlmul + 1); + + int32_t tmp_vl = this->vl; + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntWideningMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMacroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorFloatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMicroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorFloatDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatCvtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorFloatCvtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + + +def template VectorFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template ViotaMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + int cnt = 0; + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + + +def template ViotaMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + + StaticInstPtr microop; + + // Allow one empty micro op to hold IsLastMicroop flag + for (int i = 0; i < num_microops && micro_vl >= 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i, + &cnt); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template ViotaMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + int* cnt; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, int* cnt); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template ViotaMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, int* cnt) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->cnt = cnt; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]); +} + +}}; + +def template ViotaMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + + +def template Vector1Vs1VdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1VdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = uint8_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template Vector1Vs1RdMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template Vector1Vs1RdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1RdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_rd)s; + uint64_t Rd = 0; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template VectorIntMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv[m] or *.vx[m] + // vs2, old_vd, v0 for *.vi[m] + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorFloatMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv or *.vf + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VMvWholeMacroDeclare {{ + +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMacroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = _machInst.simm3 + 1; + StaticInstPtr microop; + + for (int i = 0; i < num_microops; ++i) { + microop = new %(class_name)sMicro(_machInst, 0, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VMvWholeMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[1]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _microIdx]); +} + +}}; + +def template VMvWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext* xc, trace::InstRecord* traceData) const +{ + // TODO: Check register alignment. + // TODO: If vd is equal to vs2 the instruction is an architectural NOP. + %(op_decl)s; + %(op_rd)s; + for (size_t i = 0; i < (VLEN / 64); i++) { + %(code)s; + } + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = uint8_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + // TODO: remove it + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorMaskDecodeBlock {{ + +return new %(class_name)s(machInst); + +}}; + +def template VectorNonSplitDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorNonSplitConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template VectorIntNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorReduceMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + microop_result = f(microop_result, Vs2[i]); + } + } + return microop_result; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorGatherMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + constexpr uint32_t vd_eewb = sizeof(ElemType); + constexpr uint32_t vs2_eewb = sizeof(ElemType); + constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr bool vs1_split = vd_eewb > vs1_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const int8_t vs1_emul = lmul + + (vs1_split ? -(vs2_eewb / vs1_eewb) : vs1_eewb / vs2_eewb); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; + const uint8_t vd_vregs = vs2_vregs; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; i < std::max(vs1_vregs, vd_vregs) && micro_vl > 0; + i++) { + for (uint8_t j = 0; j < vs2_vregs; j++) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, i * vs2_vregs + j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorGatherMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + [[maybe_unused]] constexpr uint32_t vd_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs2_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = _microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + _microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + _microIdx / vs2_vregs / vd_split_num; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorGatherMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + const uint32_t vlmax = vtype_VLMAX(vtype); + constexpr uint8_t vd_eewb = sizeof(ElemType); + constexpr uint8_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs2_eewb = sizeof(ElemType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; + [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; + [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); + [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + microIdx / vs2_vregs / vd_split_num; + [[maybe_unused]] const uint16_t vs1_bias = + vs1_elems * (vd_idx % vs1_split_num) / vs1_split_num; + [[maybe_unused]] const uint16_t vd_bias = + vd_elems * (vs1_idx % vd_split_num) / vd_split_num; + + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorGatherDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + using elem_type [[maybe_unused]] = uint8_t; + return new %(class_name)s(machInst); + } + case 0b001: { + using elem_type [[maybe_unused]] = uint16_t; + return new %(class_name)s(machInst); + } + case 0b010: { + using elem_type [[maybe_unused]] = uint32_t; + return new %(class_name)s(machInst); + } + case 0b011: { + using elem_type [[maybe_unused]] = uint64_t; + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntVxsatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; + bool vxsat = false; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, + micro_vl, i, &vxsat); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + microop = new VxsatMicroInst(&vxsat, _machInst); + microop->setFlag(StaticInst::IsSerializeAfter); + microop->setFlag(StaticInst::IsNonSpeculative); + this->microops.push_back(microop); + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntVxsatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + bool* vxsatptr; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, bool* vxsatptr); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->vxsatptr = vxsatptr; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]); + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorSlideMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideUpMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = 0; j <= i; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideDownMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = i; j < num_microops; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx, _vdIdx, _vs2Idx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorFloatSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index d54243ad7d..f8be1e555b 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -1,3 +1,31 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + def template VMemMacroDeclare {{ class %(class_name)s : public %(base_class)s diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh index 4809372070..cca9e1be2f 100644 --- a/src/arch/riscv/regs/float.hh +++ b/src/arch/riscv/regs/float.hh @@ -211,6 +211,20 @@ const std::vector RegNames = { } // namespace float_reg +inline float32_t +fsgnj32(float32_t a, float32_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f32(insertBits(b.v, 30, 0, a.v)); +} + +inline float64_t +fsgnj64(float64_t a, float64_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f64(insertBits(b.v, 62, 0, a.v)); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 1db6d6df3b..40054aec0f 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -241,6 +241,13 @@ remu(T rs1, T rs2) return (rs2 == 0) ? rs1 : rs1 % rs2; } +// Vector extension functions +inline uint64_t +vtype_SEW(const uint64_t vtype) +{ + return 8 << bits(vtype, 5, 3); +} + /* * Encode LMUL to lmul as follows: * LMUL vlmul lmul @@ -269,6 +276,25 @@ vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); } +inline int64_t +vtype_vlmul(const uint64_t vtype) +{ + return (int64_t)sext<3>(bits(vtype, 2, 0)); +} + +inline uint64_t +vtype_regs_per_group(const uint64_t vtype) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + return 1 << std::max(0, lmul); +} + +inline void +vtype_set_vill(uint64_t& vtype) +{ + vtype = (uint64_t)0 ^ (1UL << (sizeof(RegVal) * 8 - 1)); +} + inline uint64_t width_EEW(uint64_t width) { @@ -296,6 +322,461 @@ elem_mask(const T* vs, const int index) return (vs[idx] >> pos) & 1; } +template struct double_width; +template<> struct double_width { using type = uint16_t;}; +template<> struct double_width { using type = uint32_t;}; +template<> struct double_width { using type = uint64_t;}; +template<> struct double_width { using type = int16_t; }; +template<> struct double_width { using type = int32_t; }; +template<> struct double_width { using type = int64_t; }; +template<> struct double_width { using type = float64_t;}; + +template struct double_widthf; +template<> struct double_widthf { using type = float64_t;}; +template<> struct double_widthf { using type = float64_t;}; + +template auto +ftype(IntType a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +// TODO: Consolidate ftype_freg(freg_t a) and ftype(IntType a) into a +// single function +template auto +ftype_freg(freg_t a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +template FloatType +fadd(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_add(a, b); + else if constexpr(std::is_same_v) + return f64_add(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsub(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_sub(a, b); + else if constexpr(std::is_same_v) + return f64_sub(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmin(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_min(a, b); + else if constexpr(std::is_same_v) + return f64_min(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmax(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_max(a, b); + else if constexpr(std::is_same_v) + return f64_max(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fdiv(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_div(a, b); + else if constexpr(std::is_same_v) + return f64_div(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmul(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_mul(a, b); + else if constexpr(std::is_same_v) + return f64_mul(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsqrt(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_sqrt(a); + else if constexpr(std::is_same_v) + return f64_sqrt(a); + GEM5_UNREACHABLE; +} + +template FloatType +frsqrte7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_rsqrte7(a); + else if constexpr(std::is_same_v) + return f64_rsqrte7(a); + GEM5_UNREACHABLE; +} + +template FloatType +frecip7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_recip7(a); + else if constexpr(std::is_same_v) + return f64_recip7(a); + GEM5_UNREACHABLE; +} + +template FloatType +fclassify(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(f32_classify(a)); + else if constexpr(std::is_same_v) + return f64(f64_classify(a)); + GEM5_UNREACHABLE; +} + +template FloatType +fsgnj(FloatType a, FloatType b, bool n, bool x) +{ + if constexpr(std::is_same_v) + return fsgnj32(a, b, n, x); + else if constexpr(std::is_same_v) + return fsgnj64(a, b, n, x); + GEM5_UNREACHABLE; +} + +template bool +fle(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_le(a, b); + else if constexpr(std::is_same_v) + return f64_le(a, b); + GEM5_UNREACHABLE; +} + +template bool +feq(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_eq(a, b); + else if constexpr(std::is_same_v) + return f64_eq(a, b); + GEM5_UNREACHABLE; +} + +template bool +flt(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_lt(a, b); + else if constexpr(std::is_same_v) + return f64_lt(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmadd(FloatType a, FloatType b, FloatType c) +{ + if constexpr(std::is_same_v) + return f32_mulAdd(a, b, c); + else if constexpr(std::is_same_v) + return f64_mulAdd(a, b, c); + GEM5_UNREACHABLE; +} + +template FloatType +fneg(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(a.v ^ uint32_t(mask(31, 31))); + else if constexpr(std::is_same_v) + return f64(a.v ^ mask(63, 63)); + GEM5_UNREACHABLE; +} + +template::type> WFT +fwiden(FT a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template IntType +f_to_ui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui32(a, mode, true); + else if constexpr(std::is_same_v) + return f64_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_nui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f64_to_ui32(a, mode, true); + GEM5_UNREACHABLE; +} + +template IntType +f_to_i(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f32_to_i32(a, mode, true); + else if constexpr(std::is_same_v) + return (uint64_t)f64_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wi(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint64_t)f32_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_ni(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f64_to_i32(a, mode, true); + GEM5_UNREACHABLE; +} + +template +FloatType +ui_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f32(a); + else if constexpr(std::is_same_v) + return ui64_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +ui_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> FloatType +ui_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return ui64_to_f32(a); + GEM5_UNREACHABLE; +} + +template +FloatType +i_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f32((int32_t)a); + else if constexpr(std::is_same_v) + return i64_to_f64((int64_t)a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +i_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f64((int32_t)a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = std::make_signed_t< + decltype(double_width::type::v) + > +> FloatType +i_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return i64_to_f32(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename FloatWType = typename double_width::type +> FloatWType +f_to_wf(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatNType, + typename FloatType = typename double_width::type +> FloatNType +f_to_nf(FloatType a) +{ + if constexpr(std::is_same_v) + return f64_to_f32(a); + GEM5_UNREACHABLE; +} + +//ref: https://locklessinc.com/articles/sat_arithmetic/ +template T +sat_add(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux + uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_sub(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux - uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) & (ux ^ res)) < 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_addu(T x, T y, bool* sat) +{ + T res = x + y; + + bool t = res < x; + if (false == *sat){ + *sat = t; + } + res |= -(res < x); + + return res; +} + +template T +sat_subu(T x, T y, bool* sat) +{ + T res = x - y; + + bool t = !(res <= x); + if (false == *sat){ + *sat = t; + } + + res &= -(res <= x); + + return res; +} + +/** + * Ref: + * https://github.com/riscv-software-src/riscv-isa-sim + */ +template T +int_rounding(T result, uint8_t xrm, unsigned gb) { + const uint64_t lsb = 1UL << gb; + const uint64_t lsb_half = lsb >> 1; + switch (xrm) { + case 0 /* RNU */: + result += lsb_half; + break; + case 1 /* RNE */: + if ((result & lsb_half) && + ((result & (lsb_half - 1)) || (result & lsb))) + result += lsb; + break; + case 2 /* RDN */: + break; + case 3 /* ROD */: + if (result & (lsb - 1)) + result |= lsb; + break; + default: + panic("Invalid xrm value %d", (int)xrm); + } + + return result; +} + } // namespace RiscvISA } // namespace gem5