From d96df4025323e0173d32b5bba9086fea28c8444a Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 1 Aug 2023 16:22:44 -0700 Subject: [PATCH 01/10] stdlib: Added support for JSON via env variables. Change-Id: I5791e6d51b3b9f68eb212a46c4cd0add23668340 Co-authored-by: Kunal Pai --- src/python/gem5/resources/client.py | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/python/gem5/resources/client.py b/src/python/gem5/resources/client.py index ab8262bf92..c43dd76ac5 100644 --- a/src/python/gem5/resources/client.py +++ b/src/python/gem5/resources/client.py @@ -30,7 +30,7 @@ import os from typing import Optional, Dict, List from .client_api.client_wrapper import ClientWrapper from gem5.gem5_default_config import config -from m5.util import inform +from m5.util import inform, warn from _m5 import core @@ -53,6 +53,14 @@ clientwrapper = None def _get_clientwrapper(): global clientwrapper if clientwrapper is None: + if ( + "GEM5_RESOURCE_JSON" in os.environ + and "GEM5_RESOURCE_JSON_APPEND" in os.environ + ): + raise Exception( + "Both GEM5_RESOURCE_JSON and GEM5_RESOURCE_JSON_APPEND are set. Please set only one of them." + ) + # First check if the config file path is provided in the environment variable if "GEM5_CONFIG" in os.environ: config_file_path = Path(os.environ["GEM5_CONFIG"]) @@ -68,6 +76,29 @@ def _get_clientwrapper(): else: gem5_config = config inform("Using default config") + + # If the GEM5_RESOURCE_JSON_APPEND is set, append the resources to the gem5_config + if "GEM5_RESOURCE_JSON_APPEND" in os.environ: + json_source = { + "url": os.environ["GEM5_RESOURCE_JSON_APPEND"], + "isMongo": False, + } + gem5_config["sources"].update( + {"GEM5_RESOURCE_JSON_APPEND": json_source} + ) + inform( + f"Appending resources from {os.environ['GEM5_RESOURCE_JSON_APPEND']}" + ) + # If the GEM5_RESOURCE_JSON is set, use it as the only source + elif "GEM5_RESOURCE_JSON" in os.environ: + json_source = { + "url": os.environ["GEM5_RESOURCE_JSON"], + "isMongo": False, + } + gem5_config["sources"] = {"GEM5_RESOURCE_JSON": json_source} + warn( + f"No config sources are used, Using resources from {os.environ['GEM5_RESOURCE_JSON']}" + ) clientwrapper = ClientWrapper(gem5_config) return clientwrapper From 32b7ffc4546fd53bc50487d1899188844177bb1d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 1 Aug 2023 17:22:35 -0700 Subject: [PATCH 02/10] stdlib: fixed warning message Change-Id: I04ef23529d7afc5d46fbba7558279ec08acd629a Co-authored-by: paikunal --- src/python/gem5/resources/client.py | 37 +++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/python/gem5/resources/client.py b/src/python/gem5/resources/client.py index c43dd76ac5..571a8254e0 100644 --- a/src/python/gem5/resources/client.py +++ b/src/python/gem5/resources/client.py @@ -60,9 +60,31 @@ def _get_clientwrapper(): raise Exception( "Both GEM5_RESOURCE_JSON and GEM5_RESOURCE_JSON_APPEND are set. Please set only one of them." ) - + gem5_config = {} + # If the GEM5_RESOURCE_JSON is set, use it as the only source + if "GEM5_RESOURCE_JSON" in os.environ: + json_source = { + "url": os.environ["GEM5_RESOURCE_JSON"], + "isMongo": False, + } + gem5_config["sources"] = {"GEM5_RESOURCE_JSON": json_source} + if "GEM5_CONFIG" in os.environ: + warn( + f"Both GEM5_CONFIG and GEM5_RESOURCE_JSON are set.\n" + f"GEM5_CONFIG will be ignored in favor of the GEM5_RESOURCE_JSON environment variable." + ) + elif (Path().cwd().resolve() / "gem5-config.json").exists(): + warn( + f"Both gem5-config.json and GEM5_RESOURCE_JSON are set.\n" + f"gem5-config.json will be ignored in favor of the GEM5_RESOURCE_JSON environment variable." + ) + else: + warn( + f"GEM5_RESOURCE_JSON is set.\n" + f"gem5-default-config will be ignored in favor of the GEM5_RESOURCE_JSON environment variable." + ) # First check if the config file path is provided in the environment variable - if "GEM5_CONFIG" in os.environ: + elif "GEM5_CONFIG" in os.environ: config_file_path = Path(os.environ["GEM5_CONFIG"]) gem5_config = getFileContent(config_file_path) inform("Using config file specified by $GEM5_CONFIG") @@ -89,16 +111,7 @@ def _get_clientwrapper(): inform( f"Appending resources from {os.environ['GEM5_RESOURCE_JSON_APPEND']}" ) - # If the GEM5_RESOURCE_JSON is set, use it as the only source - elif "GEM5_RESOURCE_JSON" in os.environ: - json_source = { - "url": os.environ["GEM5_RESOURCE_JSON"], - "isMongo": False, - } - gem5_config["sources"] = {"GEM5_RESOURCE_JSON": json_source} - warn( - f"No config sources are used, Using resources from {os.environ['GEM5_RESOURCE_JSON']}" - ) + clientwrapper = ClientWrapper(gem5_config) return clientwrapper From 73892c9b47984b1a1e4441ce4141aa0576147bf6 Mon Sep 17 00:00:00 2001 From: Xuan Hu Date: Tue, 21 Mar 2023 13:11:01 +0800 Subject: [PATCH 03/10] arch-riscv: Add risc-v vector regs and configs This commit add regs and configs for vector extension * Add 32 vector arch regs as spec defined and 8 internal regs for uop-based vector implementation. * Add default vector configs(VLEN = 256, ELEN = 64). These cannot be changed yet, since the vector implementation has only be tested with such configs. * Add disassamble register name v0~v31 and vtmp0~vtmp7. * Add CSR registers defined in RISCV Vector Spec v1.0. * Add vector bitfields. * Add vector operand_types and operands. Change-Id: I7bbab1ee9e0aa804d6f15ef7b77fac22d4f7212a Co-authored-by: Yang Liu Co-authored-by: Fan Yang <1209202421@qq.com> Co-authored-by: Jerin Joy arch-riscv: enable rvv flags only for RV64 Change-Id: I6586e322dfd562b598f63a18964d17326c14d4cf --- src/arch/riscv/faults.hh | 2 +- src/arch/riscv/isa.cc | 54 +++++++++++++++++-- src/arch/riscv/isa.hh | 2 + src/arch/riscv/isa/bitfields.isa | 24 +++++++++ src/arch/riscv/isa/includes.isa | 1 + src/arch/riscv/isa/operands.isa | 15 +++++- src/arch/riscv/regs/misc.hh | 31 +++++++++-- src/arch/riscv/regs/vector.hh | 90 ++++++++++++++++++++++++++++++++ src/arch/riscv/utility.hh | 10 +++- 9 files changed, 219 insertions(+), 10 deletions(-) create mode 100644 src/arch/riscv/regs/vector.hh diff --git a/src/arch/riscv/faults.hh b/src/arch/riscv/faults.hh index f687fd6f20..fa67e3b34c 100644 --- a/src/arch/riscv/faults.hh +++ b/src/arch/riscv/faults.hh @@ -173,7 +173,7 @@ class InstFault : public RiscvFault : RiscvFault(n, FaultType::OTHERS, INST_ILLEGAL), _inst(inst) {} - RegVal trap_value() const override { return bits(_inst, 31, 0); } + RegVal trap_value() const override { return _inst.instBits; } }; class UnknownInstFault : public InstFault diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 94a8239bac..2f9d52e1b2 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -43,6 +43,7 @@ #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/int.hh" #include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" #include "base/bitfield.hh" #include "base/compiler.hh" #include "base/logging.hh" @@ -52,6 +53,7 @@ #include "debug/LLSC.hh" #include "debug/MatRegs.hh" #include "debug/RiscvMisc.hh" +#include "debug/VecRegs.hh" #include "mem/packet.hh" #include "mem/request.hh" #include "params/RiscvISA.hh" @@ -189,6 +191,14 @@ namespace RiscvISA [MISCREG_FFLAGS] = "FFLAGS", [MISCREG_FRM] = "FRM", + [MISCREG_VSTART] = "VSTART", + [MISCREG_VXSAT] = "VXSAT", + [MISCREG_VXRM] = "VXRM", + [MISCREG_VCSR] = "VCSR", + [MISCREG_VL] = "VL", + [MISCREG_VTYPE] = "VTYPE", + [MISCREG_VLENB] = "VLENB", + [MISCREG_NMIVEC] = "NMIVEC", [MISCREG_NMIE] = "NMIE", [MISCREG_NMIP] = "NMIP", @@ -234,11 +244,10 @@ namespace { /* Not applicable to RISCV */ -RegClass vecRegClass(VecRegClass, VecRegClassName, 1, debug::IntRegs); -RegClass vecElemClass(VecElemClass, VecElemClassName, 2, debug::IntRegs); -RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 1, +RegClass vecElemClass(VecElemClass, VecElemClassName, 0, debug::IntRegs); +RegClass vecPredRegClass(VecPredRegClass, VecPredRegClassName, 0, debug::IntRegs); -RegClass matRegClass(MatRegClass, MatRegClassName, 1, debug::MatRegs); +RegClass matRegClass(MatRegClass, MatRegClassName, 0, debug::MatRegs); RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace @@ -275,6 +284,13 @@ ISA::copyRegsFrom(ThreadContext *src) for (auto &id: floatRegClass) tc->setReg(id, src->getReg(id)); + // Third loop through the vector registers. + RiscvISA::VecRegContainer vc; + for (auto &id: vecRegClass) { + src->getReg(id, &vc); + tc->setReg(id, &vc); + } + // Lastly copy PC/NPC tc->pcState(src->pcState()); } @@ -299,6 +315,7 @@ void ISA::clear() // mark FS is initial status.fs = INITIAL; + // rv_type dependent init. switch (rv_type) { case RV32: @@ -307,6 +324,8 @@ void ISA::clear() case RV64: misa.rv64_mxl = 2; status.uxl = status.sxl = 2; + status.vs = VPUStatus::INITIAL; + misa.rvv = 1; break; default: panic("%s: Unknown rv_type: %d", name(), (int)rv_type); @@ -479,6 +498,17 @@ ISA::readMiscReg(RegIndex idx) return readMiscRegNoEffect(idx); } + case MISCREG_VLENB: + { + return VLENB; + } + break; + case MISCREG_VCSR: + { + return readMiscRegNoEffect(MISCREG_VXSAT) & + (readMiscRegNoEffect(MISCREG_VXRM) << 1); + } + break; default: // Try reading HPM counters // As a placeholder, all HPM counters are just cycle counters @@ -652,6 +682,22 @@ ISA::setMiscReg(RegIndex idx, RegVal val) setMiscRegNoEffect(idx, val); } break; + case MISCREG_VXSAT: + { + setMiscRegNoEffect(idx, val & 0x1); + } + break; + case MISCREG_VXRM: + { + setMiscRegNoEffect(idx, val & 0x3); + } + break; + case MISCREG_VCSR: + { + setMiscRegNoEffect(MISCREG_VXSAT, val & 0x1); + setMiscRegNoEffect(MISCREG_VXRM, (val & 0x6) >> 1); + } + break; default: setMiscRegNoEffect(idx, val); } diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index 31001c04b4..d7b0a21a1f 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -67,6 +67,8 @@ enum FPUStatus DIRTY = 3, }; +using VPUStatus = FPUStatus; + class ISA : public BaseISA { protected: diff --git a/src/arch/riscv/isa/bitfields.isa b/src/arch/riscv/isa/bitfields.isa index 8589269949..280bcbab22 100644 --- a/src/arch/riscv/isa/bitfields.isa +++ b/src/arch/riscv/isa/bitfields.isa @@ -133,3 +133,27 @@ def bitfield BIT25 <25>; def bitfield RNUM <23:20>; def bitfield KFUNCT5 <29:25>; def bitfield BS <31:30>; + +// Vector instructions +def bitfield VFUNCT6 vfunct6; +def bitfield VFUNCT5 vfunct5; +def bitfield VFUNCT3 vfunct3; +def bitfield VFUNCT2 vfunct2; + +def bitfield VS3 vs3; +def bitfield VS2 vs2; +def bitfield VS1 vs1; +def bitfield VD vd; + +def bitfield NF nf; +def bitfield MEW mew; +def bitfield MOP mop; +def bitfield VM vm; +def bitfield LUMOP lumop; +def bitfield SUMOP sumop; +def bitfield WIDTH width; + +def bitfield BIT31 bit31; +def bitfield BIT30 bit30; +def bitfield SIMM5 uimm_vsetivli; +def bitfield SIMM3 simm3; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index 8dddc2fb59..cb95f58f7e 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -95,6 +95,7 @@ output exec {{ #include "arch/riscv/reg_abi.hh" #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" #include "arch/riscv/utility.hh" #include "base/condcodes.hh" #include "cpu/base.hh" diff --git a/src/arch/riscv/isa/operands.isa b/src/arch/riscv/isa/operands.isa index 72d8f81bca..a81b28df57 100644 --- a/src/arch/riscv/isa/operands.isa +++ b/src/arch/riscv/isa/operands.isa @@ -38,7 +38,15 @@ def operand_types {{ 'sd' : 'int64_t', 'ud' : 'uint64_t', 'sf' : 'float', - 'df' : 'double' + 'df' : 'double', + + 'vi' : 'vi', + 'vu' : 'vu', + 'vwi' : 'vwi', + 'vwu' : 'vwu', + 'vext' : 'vext', + 'vextu' : 'vextu', + 'vc' : 'RiscvISA::VecRegContainer' }}; let {{ @@ -79,6 +87,11 @@ def operands {{ 'Fp2': FloatRegOp('df', 'FP2 + 8', 'IsFloating', 2), 'Fp2_bits': FloatRegOp('ud', 'FP2 + 8', 'IsFloating', 2), + 'Vd': VecRegOp('vc', 'VD', 'IsVector', 1), + 'Vs1': VecRegOp('vc', 'VS1', 'IsVector', 2), + 'Vs2': VecRegOp('vc', 'VS2', 'IsVector', 3), + 'Vs3': VecRegOp('vc', 'VS3', 'IsVector', 4), + #Memory Operand 'Mem': MemOp('ud', None, (None, 'IsLoad', 'IsStore'), 5), diff --git a/src/arch/riscv/regs/misc.hh b/src/arch/riscv/regs/misc.hh index 5ea3536141..64072c97e2 100644 --- a/src/arch/riscv/regs/misc.hh +++ b/src/arch/riscv/regs/misc.hh @@ -191,6 +191,14 @@ enum MiscRegIndex MISCREG_FFLAGS, MISCREG_FRM, + MISCREG_VSTART, + MISCREG_VXSAT, + MISCREG_VXRM, + MISCREG_VCSR, + MISCREG_VL, + MISCREG_VTYPE, + MISCREG_VLENB, + // These registers are not in the standard, hence does not exist in the // CSRData map. These are mainly used to provide a minimal implementation // for non-maskable-interrupt in our simple cpu. @@ -476,7 +484,15 @@ enum CSRIndex CSR_TDATA3 = 0x7A3, CSR_DCSR = 0x7B0, CSR_DPC = 0x7B1, - CSR_DSCRATCH = 0x7B2 + CSR_DSCRATCH = 0x7B2, + + CSR_VSTART = 0x008, + CSR_VXSAT = 0x009, + CSR_VXRM = 0x00A, + CSR_VCSR = 0x00F, + CSR_VL = 0xC20, + CSR_VTYPE = 0xC21, + CSR_VLENB = 0xC22 }; struct CSRMetadata @@ -718,7 +734,15 @@ const std::unordered_map CSRData = { {CSR_TDATA3, {"tdata3", MISCREG_TDATA3, rvTypeFlags(RV64, RV32)}}, {CSR_DCSR, {"dcsr", MISCREG_DCSR, rvTypeFlags(RV64, RV32)}}, {CSR_DPC, {"dpc", MISCREG_DPC, rvTypeFlags(RV64, RV32)}}, - {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}} + {CSR_DSCRATCH, {"dscratch", MISCREG_DSCRATCH, rvTypeFlags(RV64, RV32)}}, + + {CSR_VSTART, {"vstart", MISCREG_VSTART, rvTypeFlags(RV64, RV32)}}, + {CSR_VXSAT, {"vxsat" , MISCREG_VXSAT, rvTypeFlags(RV64, RV32)}}, + {CSR_VXRM, {"vxrm" , MISCREG_VXRM, rvTypeFlags(RV64, RV32)}}, + {CSR_VCSR, {"vcsr" , MISCREG_VCSR, rvTypeFlags(RV64, RV32)}}, + {CSR_VL, {"vl" , MISCREG_VL, rvTypeFlags(RV64, RV32)}}, + {CSR_VTYPE, {"vtype" , MISCREG_VTYPE, rvTypeFlags(RV64, RV32)}}, + {CSR_VLENB, {"VLENB" , MISCREG_VLENB, rvTypeFlags(RV64, RV32)}} }; /** @@ -816,6 +840,7 @@ const off_t SBE_OFFSET[enums::Num_RiscvType] = { const off_t SXL_OFFSET = 34; const off_t UXL_OFFSET = 32; const off_t FS_OFFSET = 13; +const off_t VS_OFFSET = 9; const off_t FRM_OFFSET = 5; const RegVal ISA_MXL_MASKS[enums::Num_RiscvType] = { @@ -853,7 +878,7 @@ const RegVal STATUS_MPRV_MASK = 1ULL << 17; const RegVal STATUS_XS_MASK = 3ULL << 15; const RegVal STATUS_FS_MASK = 3ULL << FS_OFFSET; const RegVal STATUS_MPP_MASK = 3ULL << 11; -const RegVal STATUS_VS_MASK = 3ULL << 9; +const RegVal STATUS_VS_MASK = 3ULL << VS_OFFSET; const RegVal STATUS_SPP_MASK = 1ULL << 8; const RegVal STATUS_MPIE_MASK = 1ULL << 7; const RegVal STATUS_SPIE_MASK = 1ULL << 5; diff --git a/src/arch/riscv/regs/vector.hh b/src/arch/riscv/regs/vector.hh new file mode 100644 index 0000000000..d722c2d03a --- /dev/null +++ b/src/arch/riscv/regs/vector.hh @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef __ARCH_RISCV_REGS_VECTOR_HH__ +#define __ARCH_RISCV_REGS_VECTOR_HH__ + +#include +#include +#include + +#include "arch/generic/vec_pred_reg.hh" +#include "arch/generic/vec_reg.hh" +#include "base/bitunion.hh" +#include "cpu/reg_class.hh" +#include "debug/VecRegs.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +constexpr unsigned ELEN = 64; +constexpr unsigned VLEN = 256; +constexpr unsigned VLENB = VLEN / 8; + +using VecRegContainer = gem5::VecRegContainer; +using vreg_t = VecRegContainer; + +const int NumVecStandardRegs = 32; +const int NumVecInternalRegs = 8; // Used by vector uop +const int NumVecRegs = NumVecStandardRegs + NumVecInternalRegs; + +const std::vector VecRegNames = { + "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", + "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", + "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", + "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", + "vtmp0", "vtmp1", "vtmp2", "vtmp3", "vtmp4", "vtmp5", "vtmp6", "vtmp7" +}; + +// vector index +const int VecMemInternalReg0 = NumVecStandardRegs; + +static inline TypedRegClassOps vecRegClassOps; + +inline constexpr RegClass vecRegClass = + RegClass(VecRegClass, VecRegClassName, NumVecRegs, debug::VecRegs). + ops(vecRegClassOps). + regType(); + +BitUnion32(VTYPE) + Bitfield<31> vill; + Bitfield<7, 0> vtype8; + Bitfield<7> vma; + Bitfield<6> vta; + Bitfield<5, 3> vsew; + Bitfield<2, 0> vlmul; +EndBitUnion(VTYPE) + +} // namespace RiscvISA +} // namespace gem5 + +#endif // __ARCH_RISCV_REGS_VECTOR_HH__ diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 5fccc84c79..e0a8494ece 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -51,6 +51,7 @@ #include "arch/riscv/regs/float.hh" #include "arch/riscv/regs/int.hh" +#include "arch/riscv/regs/vector.hh" #include "base/types.hh" #include "cpu/reg_class.hh" #include "cpu/static_inst.hh" @@ -130,7 +131,14 @@ registerName(RegId reg) return str.str(); } return float_reg::RegNames[reg.index()]; - } else { + } else if (reg.is(VecRegClass)) { + if (reg.index() >= NumVecRegs) { + std::stringstream str; + str << "?? (v" << reg.index() << ')'; + return str.str(); + } + return VecRegNames[reg.index()]; + } else { /* It must be an InvalidRegClass, in RISC-V we should treat it as a * zero register for the disassembler to work correctly. */ From e14e066fde5516b7ab3921687248f9b74219bdf0 Mon Sep 17 00:00:00 2001 From: Xuan Hu Date: Tue, 21 Feb 2023 11:48:54 +0800 Subject: [PATCH 04/10] arch-riscv: Add risc-v vector ext v1.0 vset insts support Change-Id: I84363164ca327151101e8a1c3d8441a66338c909 Co-authored-by: Yang Liu Co-authored-by: Fan Yang <1209202421@qq.com> arch-riscv: Add a todo to fix vsetvl stall on decode Change-Id: Iafb129648fba89009345f0c0ad3710f773379bf6 --- src/arch/riscv/decoder.cc | 27 +++++ src/arch/riscv/decoder.hh | 12 +- src/arch/riscv/insts/SConscript | 1 + src/arch/riscv/insts/static_inst.hh | 1 + src/arch/riscv/insts/vector.cc | 126 +++++++++++++++++++++ src/arch/riscv/insts/vector.hh | 88 ++++++++++++++ src/arch/riscv/isa/decoder.isa | 33 ++++++ src/arch/riscv/isa/formats/formats.isa | 1 + src/arch/riscv/isa/formats/vector_conf.isa | 96 ++++++++++++++++ src/arch/riscv/isa/includes.isa | 2 + 10 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 src/arch/riscv/insts/vector.cc create mode 100644 src/arch/riscv/insts/vector.hh create mode 100644 src/arch/riscv/isa/formats/vector_conf.isa diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index 7faa310b1e..ce362ad522 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -42,6 +42,7 @@ void Decoder::reset() { aligned = true; mid = false; + vConfigDone = true; machInst = 0; emi = 0; } @@ -49,6 +50,15 @@ void Decoder::reset() void Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC) { + // TODO: Current vsetvl instructions stall decode. Future fixes should + // enable speculation, and this code will be removed. + if (GEM5_UNLIKELY(!this->vConfigDone)) { + DPRINTF(Decode, "Waiting for vset*vl* to be executed\n"); + instDone = false; + outOfBytes = false; + return; + } + // The MSB of the upper and lower halves of a machine instruction. constexpr size_t max_bit = sizeof(machInst) * 8 - 1; constexpr size_t mid_bit = sizeof(machInst) * 4 - 1; @@ -78,6 +88,14 @@ Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC) instDone = compressed(emi); } } + if (instDone) { + emi.vl = this->machVl; + emi.vtype8 = this->machVtype & 0xff; + emi.vill = this->machVtype.vill; + if (vconf(emi)) { + this->vConfigDone = false; // set true when vconfig inst execute + } + } } StaticInstPtr @@ -116,5 +134,14 @@ Decoder::decode(PCStateBase &_next_pc) return decode(emi, next_pc.instAddr()); } +void +Decoder::setVlAndVtype(uint32_t vl, VTYPE vtype) +{ + this->machVtype = vtype; + this->machVl = vl; + + this->vConfigDone = true; +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index 15cbefe39c..d1d2f3cb0c 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -32,6 +32,7 @@ #include "arch/generic/decode_cache.hh" #include "arch/generic/decoder.hh" +#include "arch/riscv/insts/vector.hh" #include "arch/riscv/types.hh" #include "base/logging.hh" #include "base/types.hh" @@ -53,12 +54,16 @@ class Decoder : public InstDecoder decode_cache::InstMap instMap; bool aligned; bool mid; + bool vConfigDone; protected: //The extended machine instruction being generated ExtMachInst emi; uint32_t machInst; + VTYPE machVtype; + uint32_t machVl; + StaticInstPtr decodeInst(ExtMachInst mach_inst); /// Decode a machine instruction. @@ -74,13 +79,18 @@ class Decoder : public InstDecoder void reset() override; - inline bool compressed(ExtMachInst inst) { return (inst & 0x3) < 0x3; } + inline bool compressed(ExtMachInst inst) { return inst.quadRant < 0x3; } + inline bool vconf(ExtMachInst inst) { + return inst.opcode == 0b1010111u && inst.funct3 == 0b111u; + } //Use this to give data to the decoder. This should be used //when there is control flow. void moreBytes(const PCStateBase &pc, Addr fetchPC) override; StaticInstPtr decode(PCStateBase &nextPC) override; + + void setVlAndVtype(uint32_t vl, VTYPE vtype); }; } // namespace RiscvISA diff --git a/src/arch/riscv/insts/SConscript b/src/arch/riscv/insts/SConscript index 704152c040..2822cf86b4 100644 --- a/src/arch/riscv/insts/SConscript +++ b/src/arch/riscv/insts/SConscript @@ -33,3 +33,4 @@ Source('compressed.cc', tags='riscv isa') Source('mem.cc', tags='riscv isa') Source('standard.cc', tags='riscv isa') Source('static_inst.cc', tags='riscv isa') +Source('vector.cc', tags='riscv isa') diff --git a/src/arch/riscv/insts/static_inst.hh b/src/arch/riscv/insts/static_inst.hh index f835713505..74f9ddb452 100644 --- a/src/arch/riscv/insts/static_inst.hh +++ b/src/arch/riscv/insts/static_inst.hh @@ -33,6 +33,7 @@ #include #include "arch/riscv/pcstate.hh" +#include "arch/riscv/regs/misc.hh" #include "arch/riscv/types.hh" #include "cpu/exec_context.hh" #include "cpu/static_inst.hh" diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc new file mode 100644 index 0000000000..3965a45b26 --- /dev/null +++ b/src/arch/riscv/insts/vector.cc @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "arch/riscv/insts/vector.hh" + +#include +#include + +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/utility.hh" +#include "cpu/static_inst.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +/** + * This function translates the 3-bit value of vlmul bits to the corresponding + * lmul value as specified in RVV 1.0 spec p11-12 chapter 3.4.2. + * + * I.e., + * vlmul = -3 -> LMUL = 1/8 + * vlmul = -2 -> LMUL = 1/4 + * vlmul = -1 -> LMUL = 1/2 + * vlmul = 0 -> LMUL = 1 + * vlmul = 1 -> LMUL = 2 + * vlmul = 2 -> LMUL = 4 + * vlmul = 3 -> LMUL = 8 + * +**/ +float +getVflmul(uint32_t vlmul_encoding) { + int vlmul = sext<3>(vlmul_encoding & 7); + float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul); + return vflmul; +} + +uint32_t +getVlmax(VTYPE vtype, uint32_t vlen) { + uint32_t sew = getSew(vtype.vsew); + // vlmax is defined in RVV 1.0 spec p12 chapter 3.4.2. + uint32_t vlmax = (vlen/sew) * getVflmul(vtype.vlmul); + return vlmax; +} + +std::string +VConfOp::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (bit31 && bit30 == 0) { + ss << registerName(srcRegIdx(0)) << ", " << registerName(srcRegIdx(1)); + } else if (bit31 && bit30) { + ss << uimm << ", " << generateZimmDisassembly(); + } else { + ss << registerName(srcRegIdx(0)) << ", " << generateZimmDisassembly(); + } + return ss.str(); +} + +std::string +VConfOp::generateZimmDisassembly() const +{ + std::stringstream s; + + // VSETIVLI uses ZIMM10 and VSETVLI uses ZIMM11 + uint64_t zimm = (bit31 && bit30) ? zimm10 : zimm11; + + bool frac_lmul = bits(zimm, 2); + int sew = 1 << (bits(zimm, 5, 3) + 3); + int lmul = bits(zimm, 1, 0); + auto vta = bits(zimm, 6) == 1 ? "ta" : "tu"; + auto vma = bits(zimm, 7) == 1 ? "ma" : "mu"; + s << "e" << sew; + if (frac_lmul) { + std::string lmul_str = ""; + switch(lmul){ + case 3: + lmul_str = "f2"; + break; + case 2: + lmul_str = "f4"; + break; + case 1: + lmul_str = "f8"; + break; + default: + panic("Unsupport fractional LMUL"); + } + s << ", m" << lmul_str; + } else { + s << ", m" << (1 << lmul); + } + s << ", " << vta << ", " << vma; + return s.str(); +} + +} // namespace RiscvISA +} // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh new file mode 100644 index 0000000000..cdeb48360c --- /dev/null +++ b/src/arch/riscv/insts/vector.hh @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2022 PLCT Lab + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __ARCH_RISCV_INSTS_VECTOR_HH__ +#define __ARCH_RISCV_INSTS_VECTOR_HH__ + +#include + +#include "arch/riscv/insts/static_inst.hh" +#include "arch/riscv/regs/misc.hh" +#include "arch/riscv/regs/vector.hh" +#include "arch/riscv/utility.hh" +#include "cpu/exec_context.hh" +#include "cpu/static_inst.hh" + +namespace gem5 +{ + +namespace RiscvISA +{ + +float +getVflmul(uint32_t vlmul_encoding); + +inline uint32_t getSew(uint32_t vsew) { + assert(vsew <= 3); + return (8 << vsew); +} + +uint32_t +getVlmax(VTYPE vtype, uint32_t vlen); + +/** + * Base class for Vector Config operations + */ +class VConfOp : public RiscvStaticInst +{ + protected: + uint64_t bit30; + uint64_t bit31; + uint64_t zimm10; + uint64_t zimm11; + uint64_t uimm; + VConfOp(const char *mnem, ExtMachInst _extMachInst, OpClass __opClass) + : RiscvStaticInst(mnem, _extMachInst, __opClass), + bit30(_extMachInst.bit30), bit31(_extMachInst.bit31), + zimm10(_extMachInst.zimm_vsetivli), + zimm11(_extMachInst.zimm_vsetvli), + uimm(_extMachInst.uimm_vsetivli) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; + + std::string generateZimmDisassembly() const; +}; + + +} // namespace RiscvISA +} // namespace gem5 + + +#endif // __ARCH_RISCV_INSTS_VECTOR_HH__ diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index a339c11375..2e5b52a879 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2012,6 +2012,39 @@ decode QUADRANT default Unknown::unknown() { } } + 0x15: decode FUNCT3 { + 0x7: decode BIT31 { + format VConfOp { + 0x0: vsetvli({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = RS1; + uint64_t requested_vl = Rs1_ud; + uint64_t requested_vtype = zimm11; + + Rd_ud = 0; + }}, VectorConfigOp, IsDirectControl, IsCondControl); + 0x1: decode BIT30 { + 0x0: vsetvl({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = RS1; + uint64_t requested_vl = Rs1_ud; + uint64_t requested_vtype = Rs2_ud; + + Rd_ud = 0; + }}, VectorConfigOp, IsDirectControl, IsCondControl); + 0x1: vsetivli({{ + uint64_t rd_bits = RD; + uint64_t rs1_bits = -1; + uint64_t requested_vl = uimm; + uint64_t requested_vtype = zimm10; + + Rd_ud = 0; + }}, VectorConfigOp, IsDirectControl, IsCondControl); + } + } + } + } + 0x18: decode FUNCT3 { format BOp { 0x0: beq({{ diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 19749438a8..0f7c94da9a 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -37,6 +37,7 @@ ##include "fp.isa" ##include "amo.isa" ##include "bs.isa" +##include "vector_conf.isa" // Include formats for nonstandard extensions ##include "compressed.isa" diff --git a/src/arch/riscv/isa/formats/vector_conf.isa b/src/arch/riscv/isa/formats/vector_conf.isa new file mode 100644 index 0000000000..556e230075 --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_conf.isa @@ -0,0 +1,96 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +def format VConfOp(code, *flags) {{ + iop = InstObjParams(name, Name, 'VConfOp', code, flags) + header_output = BasicDeclare.subst(iop) + decoder_output = BasicConstructor.subst(iop) + decode_block = BasicDecode.subst(iop) + exec_output = VConfExecute.subst(iop) +}}; + +def template VConfExecute {{ + Fault + %(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData) const + { + auto tc = xc->tcBase(); + + %(op_decl)s; + %(op_rd)s; + %(code)s; + + tc->setMiscReg(MISCREG_VSTART, 0); + + uint32_t vlen = xc->readMiscReg(MISCREG_VLENB) * 8; + uint32_t vlmax = getVlmax(xc->readMiscReg(MISCREG_VTYPE), vlen); + + VTYPE new_vtype = requested_vtype; + if (xc->readMiscReg(MISCREG_VTYPE) != new_vtype) { + vlmax = getVlmax(new_vtype, vlen); + + float vflmul = getVflmul(new_vtype.vlmul); + + uint32_t sew = getSew(new_vtype.vsew); + + uint32_t new_vill = + !(vflmul >= 0.125 && vflmul <= 8) || + sew > std::min(vflmul, 1.0f) * ELEN || + bits(requested_vtype, 30, 8) != 0; + if (new_vill) { + vlmax = 0; + new_vtype = 0; + new_vtype.vill = 1; + } + + xc->setMiscReg(MISCREG_VTYPE, new_vtype); + } + + uint32_t current_vl = xc->readMiscReg(MISCREG_VL); + uint32_t new_vl = 0; + if (vlmax == 0) { + new_vl = 0; + } else if (rd_bits == 0 && rs1_bits == 0) { + new_vl = current_vl > vlmax ? vlmax : current_vl; + } else if (rd_bits != 0 && rs1_bits == 0) { + new_vl = vlmax; + } else if (rs1_bits != 0) { + new_vl = requested_vl > vlmax ? vlmax : requested_vl; + } + + xc->setMiscReg(MISCREG_VL, new_vl); + + tc->getDecoderPtr()->as().setVlAndVtype(new_vl, new_vtype); + + Rd = new_vl; + + %(op_wb)s; + return NoFault; + } +}}; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index cb95f58f7e..1d544f40ed 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -34,6 +34,7 @@ // output header {{ +#include #include #include #include @@ -45,6 +46,7 @@ output header {{ #include #include +#include "arch/riscv/decoder.hh" #include "arch/riscv/insts/amo.hh" #include "arch/riscv/insts/bs.hh" #include "arch/riscv/insts/compressed.hh" From 91b1d50f59b558a90eb529e8c8135d3a6d774464 Mon Sep 17 00:00:00 2001 From: Xuan Hu Date: Tue, 21 Feb 2023 12:58:30 +0800 Subject: [PATCH 05/10] arch-riscv: Add risc-v vector ext v1.0 mem insts support * TODOs: + Vector Segment Load/Store + Vector Fault-only-first Load Change-Id: I2815c76404e62babab7e9466e4ea33ea87e66e75 Co-authored-by: Yang Liu Co-authored-by: Fan Yang <1209202421@qq.com> Co-authored-by: Jerin Joy --- src/arch/riscv/insts/vector.cc | 173 +++ src/arch/riscv/insts/vector.hh | 341 +++++ src/arch/riscv/isa/decoder.isa | 268 ++++ src/arch/riscv/isa/formats/formats.isa | 1 + src/arch/riscv/isa/formats/vector_mem.isa | 205 +++ src/arch/riscv/isa/includes.isa | 8 + src/arch/riscv/isa/main.isa | 3 + src/arch/riscv/isa/templates/templates.isa | 2 + src/arch/riscv/isa/templates/vector_mem.isa | 1349 +++++++++++++++++++ src/arch/riscv/utility.hh | 55 + 10 files changed, 2405 insertions(+) create mode 100644 src/arch/riscv/isa/formats/vector_mem.isa create mode 100644 src/arch/riscv/isa/templates/templates.isa create mode 100644 src/arch/riscv/isa/templates/vector_mem.isa diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index 3965a45b26..f2bde629e9 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -122,5 +122,178 @@ VConfOp::generateZimmDisassembly() const return s.str(); } +std::string VleMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')' << ", " + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " + << VLENB * microIdx << '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VleMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VseMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(1)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')'; + return ss.str(); +} + +std::string VlStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", " << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsStrideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " << + '(' << registerName(srcRegIdx(0)) << ')' << + ", "<< registerName(srcRegIdx(1)); + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VlIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(destRegIdx(0)) << "[" << uint16_t(vdElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (microIdx != 0 || machInst.vtype8.vma == 0 || machInst.vtype8.vta == 0) + ss << ", " << registerName(srcRegIdx(2)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(srcRegIdx(2)) << ", " + << '(' << registerName(srcRegIdx(0)) << ")," + << registerName(srcRegIdx(1)); + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + +std::string VsIndexMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' + << registerName(srcRegIdx(2)) << "[" << uint16_t(vs3ElemIdx) << "], " + << '(' << registerName(srcRegIdx(0)) << "), " + << registerName(srcRegIdx(1)) << "[" << uint16_t(vs2ElemIdx) << "]"; + if (!machInst.vm) ss << ", v0.t"; + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index cdeb48360c..f989d7ffbf 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -80,6 +80,347 @@ class VConfOp : public RiscvStaticInst std::string generateZimmDisassembly() const; }; +inline uint8_t checked_vtype(bool vill, uint8_t vtype) { + panic_if(vill, "vill has been set"); + const uint8_t vsew = bits(vtype, 5, 3); + panic_if(vsew >= 0b100, "vsew: %#x not supported", vsew); + const uint8_t vlmul = bits(vtype, 2, 0); + panic_if(vlmul == 0b100, "vlmul: %#x not supported", vlmul); + return vtype; +} + +class VectorMacroInst : public RiscvMacroInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvMacroInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } +}; + +class VectorMicroInst : public RiscvMicroInst +{ +protected: + uint8_t microVl; + uint8_t microIdx; + uint8_t vtype; + VectorMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : RiscvMicroInst(mnem, _machInst, __opClass), + microVl(_microVl), + microIdx(_microIdx), + vtype(_machInst.vtype8) + { + this->flags[IsVector] = true; + } +}; + +class VectorNopMicroInst : public RiscvMicroInst +{ +public: + VectorNopMicroInst(ExtMachInst _machInst) + : RiscvMicroInst("vnop", _machInst, No_OpClass) + {} + + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override + { + return NoFault; + } + + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic; + return ss.str(); + } +}; + +class VectorArithMicroInst : public VectorMicroInst +{ +protected: + VectorArithMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorArithMacroInst : public VectorMacroInst +{ + protected: + VectorArithMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorMemMicroInst : public VectorMicroInst +{ + protected: + uint32_t offset; // Used to calculate EA. + Request::Flags memAccessFlags; + + VectorMemMicroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx, + uint32_t _offset) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , offset(_offset) + , memAccessFlags(0) + {} +}; + +class VectorMemMacroInst : public VectorMacroInst +{ + protected: + VectorMemMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + {} +}; + +class VleMacroInst : public VectorMemMacroInst +{ + protected: + VleMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMacroInst : public VectorMemMacroInst +{ + protected: + VseMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VleMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VleMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsLoad] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VseMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VseMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, + uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + { + this->flags[IsStore] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMacroInst : public VectorMemMacroInst +{ + protected: + VlWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VlWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMacroInst : public VectorMemMacroInst +{ + protected: + VsWholeMacroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsWholeMicroInst : public VectorMicroInst +{ + protected: + Request::Flags memAccessFlags; + + VsWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microIdx, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMacroInst : public VectorMemMacroInst +{ + protected: + VlStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VlStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMacroInst : public VectorMemMacroInst +{ + protected: + VsStrideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsStrideMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t regIdx; + VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _regIdx, + uint8_t _microIdx, uint8_t _microVl) + : VectorMemMicroInst(mnem, _machInst, __opClass, _microVl, + _microIdx, 0) + , regIdx(_regIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMacroInst : public VectorMemMacroInst +{ + protected: + VlIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VlIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vdRegIdx; + uint8_t vdElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VlIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, + 0, 0) + , vdRegIdx(_vdRegIdx), vdElemIdx(_vdElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMacroInst : public VectorMemMacroInst +{ + protected: + VsIndexMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMemMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VsIndexMicroInst : public VectorMemMicroInst +{ + protected: + uint8_t vs3RegIdx; + uint8_t vs3ElemIdx; + uint8_t vs2RegIdx; + uint8_t vs2ElemIdx; + VsIndexMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0) + , vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx) + , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2e5b52a879..0288f37ad8 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -500,6 +500,174 @@ decode QUADRANT default Unknown::unknown() { Fd_bits = fd.v; }}, inst_flags=FloatMemReadOp); } + + 0x0: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle8_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ub[i] = Mem_vc.as()[i]; + } else { + Vd_ub[i] = Vs2_ub[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re8_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + 0x0b: VlmOp::vlm_v({{ + Vd_ub[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorUnitStrideMaskLoadOp); + } + 0x1: VlIndexOp::vluxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse8_v({{ + Vd_ub[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei8_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x5: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle16_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uh[i] = Mem_vc.as()[i]; + } else { + Vd_uh[i] = Vs2_uh[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re16_v({{ + Vd_uh[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse16_v({{ + Vd_uh[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei16_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x6: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle32_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_uw[i] = Mem_vc.as()[i]; + } else { + Vd_uw[i] = Vs2_uw[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re32_v({{ + Vd_uw[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse32_v({{ + Vd_uw[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei32_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } + 0x7: decode MOP { + 0x0: decode LUMOP { + 0x00: VleOp::vle64_v({{ + if ((machInst.vm || elem_mask(v0, ei)) && + i < this->microVl) { + Vd_ud[i] = Mem_vc.as()[i]; + } else { + Vd_ud[i] = Vs2_ud[i]; + } + }}, inst_flags=VectorUnitStrideLoadOp); + 0x08: decode NF { + format VlWholeOp { + 0x0: vl1re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x1: vl2re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x3: vl4re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + 0x7: vl8re64_v({{ + Vd_ud[i] = Mem_vc.as()[i]; + }}, inst_flags=VectorWholeRegisterLoadOp); + } + } + } + 0x1: VlIndexOp::vluxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + 0x2: VlStrideOp::vlse64_v({{ + Vd_ud[microIdx] = Mem_vc.as()[0]; + }}, inst_flags=VectorStridedLoadOp); + 0x3: VlIndexOp::vloxei64_v({{ + Vd_vu[vdElemIdx] = Mem_vc.as()[0]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedLoadOp); + } } 0x03: decode FUNCT3 { @@ -806,6 +974,106 @@ decode QUADRANT default Unknown::unknown() { Mem_ud = Fs2_bits; }}, inst_flags=FloatMemWriteOp); } + + 0x0: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse8_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + format VsWholeOp { + 0x8: decode NF { + 0x0: vs1r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x1: vs2r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x3: vs4r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + 0x7: vs8r_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorWholeRegisterStoreOp); + } + } + 0x0b: VsmOp::vsm_v({{ + Mem_vc.as()[i] = Vs3_ub[i]; + }}, inst_flags=VectorUnitStrideMaskStoreOp); + } + 0x1: VsIndexOp::vsuxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse8_v({{ + Mem_vc.as()[0] = Vs3_ub[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei8_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ub[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x5: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse16_v({{ + Mem_vc.as()[i] = Vs3_uh[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse16_v({{ + Mem_vc.as()[0] = Vs3_uh[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei16_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uh[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x6: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse32_v({{ + Mem_vc.as()[i] = Vs3_uw[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse32_v({{ + Mem_vc.as()[0] = Vs3_uw[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei32_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_uw[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } + 0x7: decode MOP { + 0x0: decode SUMOP { + 0x00: VseOp::vse64_v({{ + Mem_vc.as()[i] = Vs3_ud[i]; + }}, inst_flags=VectorUnitStrideStoreOp); + } + 0x1: VsIndexOp::vsuxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + 0x2: VsStrideOp::vsse64_v({{ + Mem_vc.as()[0] = Vs3_ud[microIdx]; + }}, inst_flags=VectorStridedStoreOp); + 0x3: VsIndexOp::vsoxei64_v({{ + Mem_vc.as()[0] = Vs3_vu[vs3ElemIdx]; + }}, {{ + EA = Rs1 + Vs2_ud[vs2ElemIdx]; + }}, inst_flags=VectorIndexedStoreOp); + } } 0x0b: decode FUNCT3 { diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 0f7c94da9a..4bdc3021d5 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -38,6 +38,7 @@ ##include "amo.isa" ##include "bs.isa" ##include "vector_conf.isa" +##include "vector_mem.isa" // Include formats for nonstandard extensions ##include "compressed.isa" diff --git a/src/arch/riscv/isa/formats/vector_mem.isa b/src/arch/riscv/isa/formats/vector_mem.isa new file mode 100644 index 0000000000..113250d5cf --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_mem.isa @@ -0,0 +1,205 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + +def VMemBase(name, Name, ea_code, memacc_code, mem_flags, + inst_flags, base_class, postacc_code='', + declare_template_base=VMemMacroDeclare, + decode_template=BasicDecode, exec_template_base='', + # If it's a macroop, the corresponding microops will be + # generated. + is_macroop=True): + # Make sure flags are in lists (convert to lists if not). + mem_flags = makeList(mem_flags) + inst_flags = makeList(inst_flags) + iop = InstObjParams(name, Name, base_class, + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code }, + inst_flags) + + constructTemplate = eval(exec_template_base + 'Constructor') + + header_output = declare_template_base.subst(iop) + decoder_output = '' + if declare_template_base is not VMemTemplateMacroDeclare: + decoder_output += constructTemplate.subst(iop) + else: + header_output += constructTemplate.subst(iop) + decode_block = decode_template.subst(iop) + exec_output = '' + if not is_macroop: + return (header_output, decoder_output, decode_block, exec_output) + + microiop = InstObjParams(name + '_micro', + Name + 'Micro', + exec_template_base + 'MicroInst', + {'ea_code': ea_code, + 'memacc_code': memacc_code, + 'postacc_code': postacc_code}, + inst_flags) + + if mem_flags: + mem_flags = [ 'Request::%s' % flag for flag in mem_flags ] + s = '\n\tmemAccessFlags = ' + '|'.join(mem_flags) + ';' + microiop.constructor += s + + microDeclTemplate = eval(exec_template_base + 'Micro' + 'Declare') + microExecTemplate = eval(exec_template_base + 'Micro' + 'Execute') + microInitTemplate = eval(exec_template_base + 'Micro' + 'InitiateAcc') + microCompTemplate = eval(exec_template_base + 'Micro' + 'CompleteAcc') + header_output = microDeclTemplate.subst(microiop) + header_output + micro_exec_output = (microExecTemplate.subst(microiop) + + microInitTemplate.subst(microiop) + + microCompTemplate.subst(microiop)) + if declare_template_base is not VMemTemplateMacroDeclare: + exec_output += micro_exec_output + else: + header_output += micro_exec_output + + return (header_output, decoder_output, decode_block, exec_output) + +}}; + +def format VleOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vle') +}}; + +def format VseOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vse') +}}; + +def format VlmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VleMacroInst', exec_template_base='Vlm', is_macroop=False) +}}; + +def format VsmOp( + memacc_code, + ea_code={{ EA = Rs1; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VseMacroInst', exec_template_base='Vsm', is_macroop=False) +}}; + +def format VlWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlWholeMacroInst', exec_template_base='VlWhole') +}}; + +def format VsWholeOp( + memacc_code, + ea_code={{ EA = Rs1 + VLENB * microIdx; }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsWholeMacroInst', exec_template_base='VsWhole') +}}; + +def format VlStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlStrideMacroInst', exec_template_base='VlStride') +}}; + +def format VsStrideOp( + memacc_code, + ea_code={{ EA = Rs1 + Rs2 * (regIdx * VLENB / elem_size + microIdx); }}, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsStrideMacroInst', exec_template_base='VsStride') +}}; + +def format VlIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VlIndexMacroInst', exec_template_base='VlIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; + +def format VsIndexOp( + memacc_code, + ea_code, + mem_flags=[], + inst_flags=[] +) {{ + (header_output, decoder_output, decode_block, exec_output) = \ + VMemBase(name, Name, ea_code, memacc_code, mem_flags, inst_flags, + 'VsIndexMacroInst', exec_template_base='VsIndex', + declare_template_base=VMemTemplateMacroDeclare, + decode_template=VMemTemplateDecodeBlock + ) +}}; diff --git a/src/arch/riscv/isa/includes.isa b/src/arch/riscv/isa/includes.isa index 1d544f40ed..76f2388faf 100644 --- a/src/arch/riscv/isa/includes.isa +++ b/src/arch/riscv/isa/includes.isa @@ -46,6 +46,7 @@ output header {{ #include #include +#include "arch/generic/memhelpers.hh" #include "arch/riscv/decoder.hh" #include "arch/riscv/insts/amo.hh" #include "arch/riscv/insts/bs.hh" @@ -55,6 +56,7 @@ output header {{ #include "arch/riscv/insts/standard.hh" #include "arch/riscv/insts/static_inst.hh" #include "arch/riscv/insts/unknown.hh" +#include "arch/riscv/insts/vector.hh" #include "arch/riscv/interrupts.hh" #include "cpu/static_inst.hh" #include "mem/packet.hh" @@ -68,9 +70,15 @@ output decoder {{ #include #include +/* riscv softfloat library */ +#include +#include +#include + #include "arch/riscv/decoder.hh" #include "arch/riscv/faults.hh" #include "arch/riscv/mmu.hh" +#include "arch/riscv/regs/float.hh" #include "base/cprintf.hh" #include "base/loader/symtab.hh" #include "cpu/thread_context.hh" diff --git a/src/arch/riscv/isa/main.isa b/src/arch/riscv/isa/main.isa index 24f366b00c..2923a965da 100644 --- a/src/arch/riscv/isa/main.isa +++ b/src/arch/riscv/isa/main.isa @@ -50,6 +50,9 @@ namespace RiscvISA; //Include the operand_types and operand definitions ##include "operands.isa" +//Include the definitions for the instruction templates +##include "templates/templates.isa" + //Include the definitions for the instruction formats ##include "formats/formats.isa" diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa new file mode 100644 index 0000000000..b4de46d846 --- /dev/null +++ b/src/arch/riscv/isa/templates/templates.isa @@ -0,0 +1,2 @@ +// Include +##include "vector_mem.isa" diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa new file mode 100644 index 0000000000..d54243ad7d --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -0,0 +1,1349 @@ +def template VMemMacroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMemTemplateMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VleConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VleMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; + +}; + +}}; + +def template VleMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = width_EEW(machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = xc->readMem(EA, Mem.as(), mem_size, memAccessFlags, + byte_enable); + if (fault != NoFault) + return fault; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VleMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + uint32_t mem_size = width_EEW(this->machInst.width) / 8 * this->microVl; + const std::vector byte_enable(mem_size, true); + Fault fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, + byte_enable); + return fault; +} + +}}; + +def template VleMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t micro_elems = VLEN / width_EEW(machInst.width); + size_t ei; + for (size_t i = 0; i < micro_elems; i++) { + ei = i + micro_vlmax * microIdx; + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VseConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + const uint32_t num_microops = ceil((float) this->vl / (micro_vlmax)); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); +} + +}}; + +def template VseMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VseMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + const size_t micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const size_t eewb = width_EEW(machInst.width) / 8; + const size_t mem_size = eewb * microVl; + std::vector byte_enable(mem_size, false); + size_t ei; + for (size_t i = 0; i < microVl; i++) { + ei = i + micro_vlmax * microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + auto it = byte_enable.begin() + i * eewb; + std::fill(it, it + eewb, true); + } + } + + Fault fault; + fault = xc->writeMem(Mem.as(), mem_size, EA, memAccessFlags, + nullptr, byte_enable); + return fault; +} + +}}; + +def template VseMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vle8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsmConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + int32_t micro_vl = (std::min(this->vl, micro_vlmax) + 7) / 8; + + StaticInstPtr microop; + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + } else { + microop = new Vse8_vMicro(_machInst, micro_vl, 0); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + } + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VsWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[0]; + RegId srcRegIdxArr[2]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _microIdx]); + this->flags[IsVector] = true; + this->flags[IsStore] = true; + } + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemAtomicLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + for (size_t i = 0; i < VLENB; i++) { + %(memacc_code)s; + } + + Fault fault = writeMemTimingLE(xc, traceData, *(vreg_t::Container*)(&Mem), + EA, memAccessFlags, nullptr); + return fault; +} + +}}; + +def template VsWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlWholeConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + size_t NFIELDS = machInst.nf + 1; + const int32_t micro_vlmax = VLEN / width_EEW(_machInst.width); + + StaticInstPtr microop; + for (int i = 0; i < NFIELDS; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vlmax, i); + microop->setDelayedCommit(); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VlWholeMicroDeclare {{ + +class %(class_name)s: public %(base_class)s +{ +private: + RegId destRegIdxArr[1]; + RegId srcRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s_micro", _machInst, %(op_class)s, + _microVl, _microIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + this->flags[IsVector] = true; + this->flags[IsLoad] = true; + } + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Addr EA; + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = readMemAtomicLE(xc, traceData, EA, + *(vreg_t::Container*)(&Mem), memAccessFlags); + if (fault != NoFault) + return fault; + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; i++) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlWholeMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Addr EA; + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + + Fault fault = initiateMemRead(xc, traceData, EA, Mem, memAccessFlags); + return fault; +} + +}}; + +def template VlWholeMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + + size_t elem_per_reg = VLEN / width_EEW(machInst.width); + for (size_t i = 0; i < elem_per_reg; ++i) { + %(memacc_code)s; + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VlStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _regIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + if (microIdx == 0) { + // treat vma as vmu + // if (machInst.vtype8.vma == 0) + memcpy(Vd, old_Vd, microVl * elem_size); + // treat vta as vtu + // if (machInst.vtype8.vta == 0) + memcpy(Vd + microVl, old_Vd + microVl, VLENB - microVl * elem_size); + } else { + memcpy(Vd, old_Vd, VLENB); + } + + size_t ei = this->regIdx * VLENB / sizeof(Vd[0]) + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsStrideConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const int32_t num_elems_per_vreg = VLEN / width_EEW(_machInst.width); + int32_t remaining_vl = this->vl; + // Num of elems in one vreg + int32_t micro_vl = std::min(remaining_vl, num_elems_per_vreg); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; micro_vl > 0; ++i) { + for (int j = 0; j < micro_vl; ++j) { + microop = new %(class_name)sMicro(machInst, i, j, micro_vl); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= num_elems_per_vreg; + micro_vl = std::min(remaining_vl, num_elems_per_vreg); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsStrideMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, rs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _regIdx, uint8_t _microIdx, + uint8_t _microVl) + : %(base_class)s("%(mnemonic)s""_micro", _machInst, %(op_class)s, + _regIdx, _microIdx, _microVl) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs2]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _regIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsStrideMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext *xc, trace::InstRecord *traceData) const +{ + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroInitiateAcc {{ + +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + Fault fault = NoFault; + Addr EA; + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs - 1, &tmp_v0); + v0 = tmp_v0.as(); + } + + %(op_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + %(ea_code)s; + + uint32_t mem_size = elem_size; + size_t ei = this->regIdx * VLENB / elem_size + this->microIdx; + bool need_store = machInst.vm || elem_mask(v0, ei); + if (need_store) { + const std::vector byte_enable(mem_size, need_store); + %(memacc_code)s; + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsStrideMicroCompleteAcc {{ + +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VlIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vd_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vd_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vd_split_num = (vs2_eewb + vd_eewb - 1) / vd_eewb; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vdRegIdx = i / vd_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vdElemIdx = j + micro_vlmax * (i % vd_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vdRegIdx, vdElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsLoad); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VlIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vdRegIdx, uint8_t _vdElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vdRegIdx, _vdElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vd + _vdRegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsLoad] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VlIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + fault = xc->readMem(EA, Mem.as(), mem_size, + memAccessFlags, byte_enable); + if (fault != NoFault) + return fault; + %(memacc_code)s; /* Vd[this->vdElemIdx] = Mem[0]; */ + } + + %(op_wb)s; + return fault; +} + +}}; + +def template VlIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + constexpr uint8_t elem_size = sizeof(Vd[0]); + %(ea_code)s; // ea_code depends on elem_size + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + bool need_load = machInst.vm || elem_mask(v0, ei); + const std::vector byte_enable(mem_size, need_load); + fault = initiateMemRead(xc, EA, mem_size, memAccessFlags, byte_enable); + return fault; +} + +}}; + +def template VlIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + trace::InstRecord *traceData) const +{ + using vu = std::make_unsigned_t; + %(op_decl)s; + %(op_rd)s; + + constexpr uint8_t elem_size = sizeof(Vd[0]); + + RiscvISA::vreg_t old_vd; + decltype(Vd) old_Vd = nullptr; + // We treat agnostic as undistrubed + xc->getRegOperand(this, 2, &old_vd); + old_Vd = old_vd.as >(); + + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + memcpy(Vd, old_Vd, VLENB); + + size_t ei = this->vdRegIdx * VLENB / elem_size + this->vdElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + memcpy(Mem.as(), pkt->getPtr(), pkt->getSize()); + %(memacc_code)s; /* Vd[this->microIdx] = Mem[0]; */ + } + + %(op_wb)s; + return NoFault; +} + +}}; + +def template VsIndexConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + + const uint32_t vs3_eewb = sizeof(ElemType); + const uint32_t vs2_eewb = width_EEW(_machInst.width) / 8; + const uint8_t vs2_split_num = (vs3_eewb + vs2_eewb - 1) / vs2_eewb; + const uint8_t vs3_split_num = (vs2_eewb + vs3_eewb - 1) / vs3_eewb; + const int32_t micro_vlmax = VLENB / std::max(vs3_eewb, vs2_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; micro_vl > 0; i++) { + for (uint8_t j = 0; j < micro_vl; ++j) { + uint8_t vs3RegIdx = i / vs3_split_num; + uint8_t vs2RegIdx = i / vs2_split_num; + uint8_t vs3ElemIdx = j + micro_vlmax * (i % vs3_split_num); + uint8_t vs2ElemIdx = j + micro_vlmax * (i % vs2_split_num); + microop = new %(class_name)sMicro(machInst, + vs3RegIdx, vs3ElemIdx, vs2RegIdx, vs2ElemIdx); + microop->setFlag(IsDelayedCommit); + microop->setFlag(IsStore); + this->microops.push_back(microop); + } + remaining_vl -= micro_vlmax; + micro_vl = std::min(remaining_vl, micro_vlmax); + } + + this->microops.front()->setFlag(IsFirstMicroop); + this->microops.back()->setFlag(IsLastMicroop); + this->flags[IsVector] = true; +} + +}}; + +def template VsIndexMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // rs1, vs2, vs3, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[0]; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, + uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, + _vs3RegIdx, _vs3ElemIdx, _vs2RegIdx, _vs2ElemIdx) + { + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setSrcRegIdx(_numSrcRegs++, intRegClass[_machInst.rs1]); + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _vs2RegIdx]); + // We treat agnostic as undistrubed + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs3 + _vs3RegIdx]); + if (!_machInst.vm) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[0]); + } + this->flags[IsStore] = true; + } + + Fault execute(ExecContext *, trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + trace::InstRecord *) const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VsIndexMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext *xc, + trace::InstRecord *traceData)const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + uint32_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroInitiateAcc {{ + +template +Fault +%(class_name)s::initiateAcc(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = std::make_unsigned_t; + Fault fault = NoFault; + Addr EA; + + %(op_src_decl)s; + %(op_rd)s; + %(ea_code)s; + constexpr uint8_t elem_size = sizeof(Vs3[0]); + RiscvISA::vreg_t tmp_v0; + uint8_t *v0; + if (!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + + constexpr uint8_t mem_size = elem_size; + const std::vector byte_enable(mem_size, true); + + size_t ei = this->vs3RegIdx * VLENB / elem_size + this->vs3ElemIdx; + if (machInst.vm || elem_mask(v0, ei)) { + %(memacc_code)s; /* Mem[0] = Vs3[this->vs3ElemIdx] */ + fault = xc->writeMem(Mem.as(), mem_size, EA, + memAccessFlags, nullptr, byte_enable); + } + return fault; +} + +}}; + +def template VsIndexMicroCompleteAcc {{ + +template +Fault +%(class_name)s::completeAcc(PacketPtr pkt, ExecContext* xc, + trace::InstRecord* traceData) const +{ + return NoFault; +} + +}}; + +def template VMemTemplateDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + return new %(class_name)s(machInst); + } + case 0b001: { + return new %(class_name)s(machInst); + } + case 0b010: { + return new %(class_name)s(machInst); + } + case 0b011: { + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index e0a8494ece..1db6d6df3b 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -241,6 +241,61 @@ remu(T rs1, T rs2) return (rs2 == 0) ? rs1 : rs1 % rs2; } +/* +* Encode LMUL to lmul as follows: +* LMUL vlmul lmul +* 1 000 0 +* 2 001 1 +* 4 010 2 +* 8 011 3 +* - 100 - +* 1/8 101 -3 +* 1/4 110 -2 +* 1/2 111 -1 +* +* then, we can calculate VLMAX = vlen >> (vsew + 3 - lmul) +* e.g. vlen = 256 bits, SEW = 16, LMUL = 1/8 +* => VLMAX = vlen >> (1 + 3 - (-3)) +* = 256 >> 7 +* = 2 +* Ref: https://github.com/qemu/qemu/blob/5e9d14f2/target/riscv/cpu.h +*/ +inline uint64_t +vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + lmul = per_reg ? std::min(0, lmul) : lmul; + int64_t vsew = bits(vtype, 5, 3); + return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); +} + +inline uint64_t +width_EEW(uint64_t width) +{ + switch (width) { + case 0b000: return 8; + case 0b101: return 16; + case 0b110: return 32; + case 0b111: return 64; + default: GEM5_UNREACHABLE; + } +} + +/* + * Spec Section 4.5 + * Ref: + * https://github.com/qemu/qemu/blob/c7d773ae/target/riscv/vector_helper.c +*/ +template +inline int +elem_mask(const T* vs, const int index) +{ + static_assert(std::is_integral_v); + int idx = index / (sizeof(T)*8); + int pos = index % (sizeof(T)*8); + return (vs[idx] >> pos) & 1; +} + } // namespace RiscvISA } // namespace gem5 From a9f9c4d6d3850308697632b585c5c4bfad9fe7b6 Mon Sep 17 00:00:00 2001 From: Xuan Hu Date: Mon, 27 Feb 2023 21:31:22 +0800 Subject: [PATCH 06/10] arch-riscv: Add risc-v vector ext v1.0 arith insts support TODOs: + vcompress.vm Change-Id: I86eceae66e90380416fd3be2c10ad616512b5eba Co-authored-by: Yang Liu Co-authored-by: Fan Yang <1209202421@qq.com> Co-authored-by: Jerin Joy arch-riscv: Add LICENCE to template files Change-Id: I825e72bffb84cce559d2e4c1fc2246c3b05a1243 --- src/arch/riscv/insts/vector.cc | 107 + src/arch/riscv/insts/vector.hh | 200 ++ src/arch/riscv/isa/decoder.isa | 2054 +++++++++++++++++ src/arch/riscv/isa/formats/formats.isa | 1 + src/arch/riscv/isa/formats/vector_arith.isa | 1319 +++++++++++ src/arch/riscv/isa/templates/templates.isa | 30 + src/arch/riscv/isa/templates/vector_arith.isa | 1989 ++++++++++++++++ src/arch/riscv/isa/templates/vector_mem.isa | 28 + src/arch/riscv/regs/float.hh | 14 + src/arch/riscv/utility.hh | 481 ++++ 10 files changed, 6223 insertions(+) create mode 100644 src/arch/riscv/isa/formats/vector_arith.isa create mode 100644 src/arch/riscv/isa/templates/vector_arith.isa diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index f2bde629e9..a1ccf402c9 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -122,6 +122,93 @@ VConfOp::generateZimmDisassembly() const return s.str(); } +std::string +VectorNonSplitInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorArithMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + // OPIVI + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorVMUNARY0MacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + +std::string VectorSlideMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", "; + if (machInst.funct3 == 0x3) { + ss << registerName(srcRegIdx(0)) << ", " << machInst.vecimm; + } else { + ss << registerName(srcRegIdx(1)) << ", " << registerName(srcRegIdx(0)); + } + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); +} + std::string VleMicroInst::generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const { @@ -295,5 +382,25 @@ std::string VsIndexMicroInst::generateDisassembly(Addr pc, return ss.str(); } +std::string +VMvWholeMacroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + +std::string +VMvWholeMicroInst::generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const +{ + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " << + registerName(srcRegIdx(1)); + return ss.str(); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index f989d7ffbf..5d0874a994 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -89,6 +89,24 @@ inline uint8_t checked_vtype(bool vill, uint8_t vtype) { return vtype; } +class VectorNonSplitInst : public RiscvStaticInst +{ + protected: + uint32_t vl; + uint8_t vtype; + VectorNonSplitInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : RiscvStaticInst(mnem, _machInst, __opClass), + vl(_machInst.vl), + vtype(checked_vtype(_machInst.vill, _machInst.vtype8)) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + class VectorMacroInst : public RiscvMacroInst { protected: @@ -170,6 +188,63 @@ class VectorArithMacroInst : public VectorMacroInst Addr pc, const loader::SymbolTable *symtab) const override; }; +class VectorVMUNARY0MicroInst : public VectorMicroInst +{ +protected: + VectorVMUNARY0MicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorVMUNARY0MacroInst : public VectorMacroInst +{ + protected: + VectorVMUNARY0MacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMacroInst : public VectorMacroInst +{ + protected: + VectorSlideMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorMacroInst(mnem, _machInst, __opClass) + { + this->flags[IsVector] = true; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VectorSlideMicroInst : public VectorMicroInst +{ + protected: + uint8_t vdIdx; + uint8_t vs2Idx; + VectorSlideMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : VectorMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + , vdIdx(_vdIdx), vs2Idx(_vs2Idx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + class VectorMemMicroInst : public VectorMicroInst { protected: @@ -421,6 +496,131 @@ class VsIndexMicroInst : public VectorMemMicroInst Addr pc, const loader::SymbolTable *symtab) const override; }; +class VMvWholeMacroInst : public VectorArithMacroInst +{ + protected: + VMvWholeMacroInst(const char* mnem, ExtMachInst _machInst, + OpClass __opClass) + : VectorArithMacroInst(mnem, _machInst, __opClass) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +class VMvWholeMicroInst : public VectorArithMicroInst +{ + protected: + VMvWholeMicroInst(const char *mnem, ExtMachInst _machInst, + OpClass __opClass, uint8_t _microVl, + uint8_t _microIdx) + : VectorArithMicroInst(mnem, _machInst, __opClass, _microVl, _microIdx) + {} + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override; +}; + +template +class VMaskMergeMicroInst : public VectorArithMicroInst +{ + private: + RegId srcRegIdxArr[NumVecInternalRegs]; + RegId destRegIdxArr[1]; + + public: + VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, + uint8_t _numSrcs) + : VectorArithMicroInst("vmask_mv_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + setRegIdxArrays( + reinterpret_cast( + &std::remove_pointer_t::srcRegIdxArr), + reinterpret_cast( + &std::remove_pointer_t::destRegIdxArr)); + + _numSrcRegs = 0; + _numDestRegs = 0; + + setDestRegIdx(_numDestRegs++, vecRegClass[_dstReg]); + _numTypedDestRegs[VecRegClass]++; + for (uint8_t i=0; i<_numSrcs; i++) { + setSrcRegIdx(_numSrcRegs++, vecRegClass[VecMemInternalReg0 + i]); + } + } + + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override { + vreg_t tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); + auto Vd = tmp_d0.as(); + constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); + size_t bit_cnt = elems_per_vreg; + vreg_t tmp_s; + xc->getRegOperand(this, 0, &tmp_s); + auto s = tmp_s.as(); + // cp the first result and tail + memcpy(Vd, s, VLENB); + for (uint8_t i = 1; i < this->_numSrcRegs; i++) { + xc->getRegOperand(this, i, &tmp_s); + s = tmp_s.as(); + if constexpr (elems_per_vreg < 8) { + constexpr uint8_t m = (1 << elems_per_vreg) - 1; + const uint8_t mask = m << (i * elems_per_vreg % 8); + // clr & ext bits + Vd[bit_cnt/8] ^= Vd[bit_cnt/8] & mask; + Vd[bit_cnt/8] |= s[bit_cnt/8] & mask; + bit_cnt += elems_per_vreg; + } else { + constexpr uint8_t byte_offset = elems_per_vreg / 8; + memcpy(Vd + i * byte_offset, s + i * byte_offset, byte_offset); + } + } + xc->setRegOperand(this, 0, &tmp_d0); + if (traceData) + traceData->setData(vecRegClass, &tmp_d0); + return NoFault; + } + + std::string generateDisassembly( + Addr pc, const loader::SymbolTable *symtab) const override { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)); + for (uint8_t i = 0; i < this->_numSrcRegs; i++) { + ss << ", " << registerName(srcRegIdx(i)); + } + ss << ", offset:" << VLENB / sizeof(ElemType); + return ss.str(); + } +}; + +class VxsatMicroInst : public VectorArithMicroInst +{ + private: + bool* vxsat; + public: + VxsatMicroInst(bool* Vxsat, ExtMachInst extMachInst) + : VectorArithMicroInst("vxsat_micro", extMachInst, + VectorIntegerArithOp, 0, 0) + { + vxsat = Vxsat; + } + Fault execute(ExecContext* xc, trace::InstRecord* traceData) + const override + { + xc->setMiscReg(MISCREG_VXSAT,*vxsat); + auto vcsr = xc->readMiscReg(MISCREG_VCSR); + xc->setMiscReg(MISCREG_VCSR, ((vcsr&~1)|*vxsat)); + return NoFault; + } + std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { + std::stringstream ss; + ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); + return ss.str(); + } +}; } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 0288f37ad8..2b46752ffe 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -2281,6 +2281,2060 @@ decode QUADRANT default Unknown::unknown() { } 0x15: decode FUNCT3 { + // OPIVV + 0x0: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vv({{ + Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x2: vsub_vv({{ + Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x4: vminu_vv({{ + Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x5: vmin_vv({{ + Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x6: vmaxu_vv({{ + Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ? + Vs2_vu[i] : Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x7: vmax_vv({{ + Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ? + Vs2_vi[i] : Vs1_vi[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x9: vand_vv({{ + Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xa: vor_vv({{ + Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0xb: vxor_vv({{ + Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Vs1_vu[i] + - vs2_elems * vs2_idx; + auto res = (Vs1_vu[i] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + Vd_vu[i] = res; + } + } + }}, OPIVV, VectorMiscOp); + 0x0e: VectorGatherFormat::vrgatherei16_vv({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint16_t idx = Vs1_uh[i + vs1_bias] + - vs2_elems * vs2_idx; + auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i + vd_bias]; + Vd_vu[i + vd_bias] = res; + } + } + }}, OPIVV, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vvm({{ + Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i] + + elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vvm({{ + Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i] + - elem_mask(v0, ei); + }}, OPIVV, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vvm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? Vs1_vu[i] + : Vs2_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_v({{ + Vd_vu[i] = Vs1_vu[i]; + }}, OPIVV, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vv({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x21: vsadd_vv({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x22: vssubu_vv({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x23: vssub_vv({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Vs1_vu[i], + vxsatptr); + }}, OPIVV, VectorIntegerArithOp); + 0x27: vsmul_vv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Vs1_vi[i] == Vs2_vi[i] && + Vs1_vi[i] == min; + __int128_t result = (__int128_t)Vs1_vi[i] * + (__int128_t)Vs2_vi[i]; + result = int_rounding<__int128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vv({{ + Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x28: vsrl_vv({{ + Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x29: vsra_vv({{ + Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1)); + }}, OPIVV, VectorIntegerArithOp); + 0x2a: vssrl_vv({{ + int sh = Vs1_vu[i] & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + 0x2b: vssra_vv({{ + int sh = Vs1_vi[i] & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVV, VectorIntegerArithOp); + } + format VectorReduceIntWideningFormat { + 0x30: vwredsumu_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwu, Vs2_vu); + }}, OPIVV, VectorIntegerReduceOp); + 0x31: vwredsum_vs({{ + Vd_vwu[0] = reduce_loop(std::plus(), + Vs1_vwi, Vs2_vi); + }}, OPIVV, VectorIntegerReduceOp); + } + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmadc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vu[i], Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vvm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i], + elem_mask(v0, ei))); + }}, OPIVV, VectorIntegerArithOp); + 0x1: vmsbc_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + 0x18: vmseq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x19: vmsne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1a: vmsltu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1b: vmslt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1c: vmsleu_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Vs1_vu[i])); + }}, OPIVV, VectorIntegerArithOp); + 0x1d: vmsle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Vs1_vi[i])); + }}, OPIVV, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wv({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2d: vnsra_wv({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1))); + }}, OPIVV, VectorIntegerArithOp); + 0x2e: vnclipu_wv({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + res = max; + // TODO: vxsat + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVV, VectorIntegerArithOp); + 0x2f: vnclip_wv({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVV, VectorIntegerArithOp); + } + } + // OPFVV + 0x1: decode VFUNCT6 { + 0x00: VectorFloatFormat::vfadd_vv({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x01: VectorReduceFloatFormat::vfredusum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x02: VectorFloatFormat::vfsub_vv({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x03: VectorReduceFloatFormat::vfredosum_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fadd(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x04: VectorFloatFormat::vfmin_vv({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: VectorReduceFloatFormat::vfredmin_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmin(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x06: VectorFloatFormat::vfmax_vv({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x07: VectorReduceFloatFormat::vfredmax_vs({{ + Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) { + return fmax(ftype(src1), ftype(src2)); + }, Vs1_vu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x08: VectorFloatFormat::vfsgnj_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x09: VectorFloatFormat::vfsgnjn_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + true, false).v; + }}, OPFVV, VectorFloatArithOp); + 0x0a: VectorFloatFormat::vfsgnjx_vv({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]), + false, true).v; + }}, OPFVV, VectorFloatArithOp); + // VWFUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.f.s are reserved + 0x1: VectorNonSplitFormat::vfmv_f_s({{ + freg_t fd = freg(Vs2_vu[0]); + Fd_bits = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + 0x12: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfcvt_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x01: vfcvt_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x02: vfcvt_f_xu_v({{ + auto fd = ui_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x03: vfcvt_f_x_v({{ + auto fd = i_to_f(Vs2_vu[i]); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x06: vfcvt_rtz_xu_f_v({{ + Vd_vu[i] = f_to_ui(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x07: vfcvt_rtz_x_f_v({{ + Vd_vu[i] = f_to_i(ftype(Vs2_vu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatWideningCvtFormat { + 0x08: vfwcvt_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x09: vfwcvt_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x0a: vfwcvt_f_xu_v({{ + auto fd = ui_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0b: vfwcvt_f_x_v({{ + auto fd = i_to_wf(Vs2_vu[i + offset]); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0c: vfwcvt_f_f_v({{ + auto fd = f_to_wf( + ftype(Vs2_vu[i + offset])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x0e: vfwcvt_rtz_xu_f_v({{ + Vd_vwu[i] = f_to_wui( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x0f: vfwcvt_rtz_x_f_v({{ + Vd_vwu[i] = f_to_wi( + ftype(Vs2_vu[i + offset]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + format VectorFloatNarrowingCvtFormat { + 0x10: vfncvt_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x11: vfncvt_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_roundingMode); + }}, OPFVV, VectorFloatConvertOp); + 0x12: vfncvt_f_xu_w({{ + auto fd = ui_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x13: vfncvt_f_x_w({{ + auto fd = i_to_nf(Vs2_vwu[i]); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x14: vfncvt_f_f_w({{ + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x15: vfncvt_rod_f_f_w({{ + softfloat_roundingMode = softfloat_round_odd; + auto fd = f_to_nf(ftype(Vs2_vwu[i])); + Vd_vu[i + offset] = fd.v; + }}, OPFVV, VectorFloatConvertOp); + 0x16: vfncvt_rtz_xu_f_w({{ + Vd_vu[i + offset] = f_to_nui( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + 0x17: vfncvt_rtz_x_f_w({{ + Vd_vu[i + offset] = f_to_ni( + ftype(Vs2_vwu[i]), + softfloat_round_minMag); + }}, OPFVV, VectorFloatConvertOp); + } + } + 0x13: decode VS1 { + format VectorFloatCvtFormat { + 0x00: vfsqrt_v({{ + auto fd = fsqrt(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x04: vfrsqrt7_v({{ + auto fd = frsqrte7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x05: vfrec7_v({{ + auto fd = frecip7(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x10: vfclass_v({{ + auto fd = fclassify(ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + + format VectorFloatMaskFormat { + 0x18: vmfeq_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x19: vmfle_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1b: vmflt_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + 0x1c: vmfne_vv({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i]))); + }}, OPFVV, VectorFloatArithOp); + } + format VectorFloatFormat { + 0x20: vfdiv_vv({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x24: vfmul_vv({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype(Vs1_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x28: vfmadd_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x29: vfnmadd_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2a: vfmsub_vv({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype(Vs1_vu[i]), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2b: vfnmsub_vv({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype(Vs1_vu[i]), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2c: vfmacc_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2d: vfnmacc_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2e: vfmsac_vv({{ + auto fd = fmadd(ftype(Vs1_vu[i]), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x2f: vfnmsac_vv({{ + auto fd = fmadd(fneg(ftype(Vs1_vu[i])), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + 0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{ + Vd_vwu[0] = reduce_loop( + [](const vwu& src1, const vu& src2) { + return fadd( + ftype(src1), + f_to_wf(ftype(src2)) + ); + }, Vs1_vwu, Vs2_vu); + }}, OPFVV, VectorFloatReduceOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vv({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x32: vfwsub_vv({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x34: vfwadd_wv({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x36: vfwsub_wv({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x38: vfwmul_vv({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype(Vs1_vu[i + offset]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3c: vfwmacc_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3d: vfwnmacc_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3e: vfwmsac_vv({{ + auto fd = fmadd( + fwiden(ftype(Vs1_vu[i + offset])), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + 0x3f: vfwnmsac_vv({{ + auto fd = fmadd( + fwiden(fneg(ftype(Vs1_vu[i + offset]))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVV, VectorFloatArithOp); + } + } + // OPMVV + 0x2: decode VFUNCT6 { + format VectorReduceIntFormat { + 0x0: vredsum_vs({{ + Vd_vi[0] = + reduce_loop(std::plus(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x1: vredand_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_and(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x2: vredor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_or(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x3: vredxor_vs({{ + Vd_vi[0] = + reduce_loop(std::bit_xor(), Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x4: vredminu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::min(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x5: vredmin_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::min(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + 0x6: vredmaxu_vs({{ + Vd_vu[0] = + reduce_loop([](const vu& src1, const vu& src2) { + return std::max(src1, src2); + }, Vs1_vu, Vs2_vu); + }}, OPMVV, VectorIntegerReduceOp); + 0x7: vredmax_vs({{ + Vd_vi[0] = + reduce_loop([](const vi& src1, const vi& src2) { + return std::max(src1, src2); + }, Vs1_vi, Vs2_vi); + }}, OPMVV, VectorIntegerReduceOp); + } + format VectorIntFormat { + 0x8: vaaddu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0x9: vaadd_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xa: vasubu_vv({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + 0xb: vasub_vv({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i]; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVV, VectorIntegerArithOp); + } + // VWXUNARY0 + 0x10: decode VS1 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.x.s are reserved. + 0x1: VectorNonSplitFormat::vmv_x_s({{ + Rd_ud = Vs2_vi[0]; + }}, OPMVV, VectorMiscOp); + } + 0x10: Vector1Vs1RdMaskFormat::vcpop_m({{ + uint64_t popcount = 0; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + if(this->vm){ + popcount += vs2_lsb; + }else{ + bool do_mask = elem_mask(v0, i); + popcount += (vs2_lsb && do_mask); + } + } + Rd_vu = popcount; + }}, OPMVV, VectorMiscOp); + 0x11: Vector1Vs1RdMaskFormat::vfirst_m({{ + int64_t pos = -1; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + if(this->vm == 0){ + if(elem_mask(v0, i)==0){ + continue; + } + } + bool vs2_lsb = elem_mask(Vs2_vu, i); + if (vs2_lsb) { + pos = i; + break; + } + } + Rd_vu = pos; + }}, OPMVV, VectorMiscOp); + } + 0x12: decode VS1 { + format VectorIntExtFormat { + 0x02: vzext_vf8({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x03: vsext_vf8({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x04: vzext_vf4({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x05: vsext_vf4({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x06: vzext_vf2({{ + Vd_vu[i] = Vs2_vextu[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + 0x07: vsext_vf2({{ + Vd_vi[i] = Vs2_vext[i + offset]; + }}, OPMVV, VectorIntegerExtensionOp); + } + } + 0x14: decode VS1 { + 0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x02: Vector1Vs1VdMaskFormat::vmsof_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x03: Vector1Vs1VdMaskFormat::vmsif_m({{ + bool has_one = false; + for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) { + bool vs2_lsb = elem_mask(Vs2_vu, i); + bool do_mask = elem_mask(v0, i); + if(this->vm||(this->vm == 0&&do_mask)){ + uint64_t res = 0; + if (!has_one && !vs2_lsb) { + res = 1; + } else if(!has_one && vs2_lsb) { + has_one = true; + res = 1; + } + Vd_ub[i/8] = ASSIGN_VD_BIT(i, res); + } + } + }}, OPMVV, VectorMiscOp); + 0x10: ViotaFormat::viota_m({{ + RiscvISAInst::VecRegContainer tmp_s2; + xc->getRegOperand(this, 2, + &tmp_s2); + auto Vs2bit = tmp_s2.as(); + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + + vtype_VLMAX(vtype, true) * this->microIdx; + bool vs2_lsb = elem_mask(Vs2bit, ei); + bool do_mask = elem_mask(v0, ei); + bool has_one = false; + if (this->vm || (do_mask && !this->vm)) { + if (vs2_lsb) { + has_one = true; + } + } + bool use_ori = (!this->vm) && !do_mask; + if(use_ori == false){ + Vd_vu[i] = *cnt; + } + if (has_one) { + *cnt = *cnt+1; + } + } + }}, OPMVV, VectorMiscOp); + 0x11: VectorIntFormat::vid_v({{ + Vd_vu[i] = ei; + }}, OPMVV, VectorMiscOp); + } + format VectorMaskFormat { + 0x18: vmandn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x19: vmand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1a: vmor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1b: vmxor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1c: vmorn_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i)); + }}, OPMVV, VectorMiscOp); + 0x1d: vmnand_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1e: vmnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + 0x1f: vmxnor_mm({{ + Vd_ub[i/8] = ASSIGN_VD_BIT(i, + !(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i))); + }}, OPMVV, VectorMiscOp); + } + format VectorIntFormat { + 0x20: vdivu_vv({{ + if (Vs1_vu[i] == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Vs1_vu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x21: vdiv_vv({{ + if (Vs1_vi[i] == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x22: vremu_vv({{ + if (Vs1_vu[i] == 0) { + Vd_vu[i] = Vs2_vu[i]; + } else { + Vd_vu[i] = Vs2_vu[i] % Vs1_vu[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x23: vrem_vv({{ + if (Vs1_vi[i] == 0) { + Vd_vi[i] = Vs2_vi[i]; + } else if (Vs2_vi[i] == std::numeric_limits::min() + && Vs1_vi[i] == -1) { + Vd_vi[i] = 0; + } else { + Vd_vi[i] = Vs2_vi[i] % Vs1_vi[i]; + } + }}, OPMVV, VectorIntegerArithOp); + 0x24: vmulhu_vv({{ + if (sew < 64) { + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Vs1_vu[i]) + >> sew; + } else { + Vd_vu[i] = mulhu_64(Vs2_vu[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x25: vmul_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x26: vmulhsu_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Vs1_vu[i]) + >> sew; + } else { + Vd_vi[i] = mulhsu_64(Vs2_vi[i], Vs1_vu[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x27: vmulh_vv({{ + if (sew < 64) { + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Vs1_vi[i]) + >> sew; + } else { + Vd_vi[i] = mulh_64(Vs2_vi[i], Vs1_vi[i]); + } + }}, OPMVV, VectorIntegerArithOp); + 0x29: vmadd_vv({{ + Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2b: vnmsub_vv({{ + Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2d: vmacc_vv({{ + Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x2f: vnmsac_vv({{ + Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x31: vwadd_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x32: vwsubu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x33: vwsub_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x34: vwaddu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x35: vwadd_wv({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x36: vwsubu_wv({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x37: vwsub_wv({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x38: vwmulu_vv({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3a: vwmulsu_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwu(Vs1_vu[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3b: vwmul_vv({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + * vwi(Vs1_vi[i + offset]); + }}, OPMVV, VectorIntegerArithOp); + 0x3c: vwmaccu_vv({{ + Vd_vwu[i] = vwu(Vs1_vu[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3d: vwmacc_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + 0x3f: vwmaccsu_vv({{ + Vd_vwi[i] = vwi(Vs1_vi[i + offset]) + * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVV, VectorIntegerArithOp); + } + } + // OPIVI + 0x3: decode VFUNCT6 { + format VectorIntFormat { + 0x00: vadd_vi({{ + Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x03: vrsub_vi({{ + Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x09: vand_vi({{ + Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0a: vor_vi({{ + Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + 0x0b: vxor_vi({{ + Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + 0x0c: VectorGatherFormat::vrgather_vi({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = + (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx; + Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVI, VectorMiscOp); + 0x0e: VectorSlideUpFormat::vslideup_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVI, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vi({{ + const int offset = (int)(uint64_t)(SIMM5); + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVI, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vim({{ + Vd_vi[i] = Vs2_vi[i] + + (vi)sext<5>(SIMM5) + elem_mask(v0, ei); + }}, OPIVI, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vim({{ + Vd_vi[i] = elem_mask(v0, ei) + ? (vi)sext<5>(SIMM5) + : Vs2_vi[i]; + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmv_v_i({{ + Vd_vi[i] = (vi)sext<5>(SIMM5); + }}, OPIVI, VectorIntegerArithOp); + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vi({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + 0x21: vsadd_vi({{ + Vd_vu[i] = sat_add(Vs2_vu[i], (vu)SIMM5, + vxsatptr); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vi({{ + Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x28: vsrl_vi({{ + Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2a: vssrl_vi({{ + int sh = SIMM5 & (vtype_SEW(vtype) - 1); + __uint128_t res = Vs2_vu[i]; + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, sh) >> sh; + + Vd_vu[i] = res; + }}, OPIVI, VectorIntegerArithOp); + 0x29: vsra_vi({{ + Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f); + }}, OPIVI, VectorIntegerArithOp); + 0x2b: vssra_vi({{ + int sh = SIMM5 & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVI, VectorIntegerArithOp); + } + // According to Spec Section 16.6, + // vm must be 1 (unmasked) in vmvr.v instructions. + 0x27: decode VM { 0x1: decode SIMM3 { + format VMvWholeFormat { + 0x0: vmv1r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x1: vmv2r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x3: vmv4r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + 0x7: vmv8r_v({{ + Vd_ud[i] = Vs2_ud[i]; + }}, OPIVI, VectorMiscOp); + } + }} + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vim({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5), + elem_mask(v0, ei))); + }}, OPIVI, VectorIntegerArithOp); + 0x1: vmadc_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + 0x18: vmseq_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] == (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x19: vmsne_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] != (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1c: vmsleu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1d: vmsle_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1e: vmsgtu_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > (vu)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + 0x1f: vmsgt_vi({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > (vi)sext<5>(SIMM5))); + }}, OPIVI, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wi({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2d: vnsra_wi({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)SIMM5 & (sew * 2 - 1))); + }}, OPIVI, VectorIntegerArithOp); + 0x2e: vnclipu_wi({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVI, VectorIntegerArithOp); + 0x2f: vnclip_wi({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = VS1 & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVI, VectorIntegerArithOp); + } + } + // OPIVX + 0x4: decode VFUNCT6 { + format VectorIntFormat { + 0x0: vadd_vx({{ + Vd_vu[i] = Vs2_vu[i] + Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x2: vsub_vx({{ + Vd_vu[i] = Vs2_vu[i] - Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0x3: vrsub_vx({{ + Vd_vu[i] = Rs1_vu - Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x4: vminu_vx({{ + Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x5: vmin_vx({{ + Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x6: vmaxu_vx({{ + Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu); + }}, OPIVX, VectorIntegerArithOp); + 0x7: vmax_vx({{ + Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi); + }}, OPIVX, VectorIntegerArithOp); + 0x9: vand_vx({{ + Vd_vu[i] = Vs2_vu[i] & Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xa: vor_vx({{ + Vd_vu[i] = Vs2_vu[i] | Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + 0xb: vxor_vx({{ + Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslideup_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslidedown_vx({{ + const int offset = (int)Rs1_vu; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = res[i]; + } + } + } + }}, OPIVX, VectorMiscOp); + 0x0c: VectorGatherFormat::vrgather_vx({{ + for (uint32_t i = 0; i < microVl; i++) { + uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + if (this->vm || elem_mask(v0, ei)) { + const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx; + Vd_vu[i] = (Rs1_vu >= vlmax) ? 0 + : (idx < vs2_elems) ? Vs2_vu[idx] + : Vs3_vu[i]; + } + } + }}, OPIVX, VectorMiscOp); + format VectorIntFormat { + 0x10: decode VM { + 0x0: vadc_vxm({{ + Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x12: decode VM { + 0x0: vsbc_vxm({{ + Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei); + }}, OPIVX, VectorIntegerArithOp); + // the unmasked versions (vm=1) are reserved + } + 0x17: decode VM { + 0x0: vmerge_vxm({{ + Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i]; + }}, OPIVX, VectorIntegerArithOp); + 0x1: decode VS2 { + 0x0: vmv_v_x({{ + Vd_vu[i] = Rs1_vu; + }}, OPIVX, VectorIntegerArithOp); + } + } + } + format VectorIntVxsatFormat{ + 0x20: vsaddu_vx({{ + Vd_vu[i] = sat_addu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x21: vsadd_vx({{ + Vd_vu[i] = sat_add(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x22: vssubu_vx({{ + Vd_vu[i] = sat_subu(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x23: vssub_vx({{ + Vd_vu[i] = sat_sub(Vs2_vu[i], Rs1_vu, + vxsatptr); + }}, OPIVX, VectorIntegerArithOp); + 0x27: vsmul_vx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min; + __int128_t result = + (__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i]; + result = int_rounding<__uint128_t>( + result, 0 /* TODO */, sew - 1); + result = result >> (sew - 1); + if (overflow) { + result = max; + *vxsatptr = true; + } + + Vd_vi[i] = (vi)result; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntFormat { + 0x25: vsll_vx({{ + Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x28: vsrl_vx({{ + Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x29: vsra_vx({{ + Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1)); + }}, OPIVX, VectorIntegerArithOp); + 0x2a: vssrl_vx({{ + int sh = Rs1_vu & (sew - 1); + __uint128_t val = Vs2_vu[i]; + + val = int_rounding<__uint128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vu[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + 0x2b: vssra_vx({{ + int sh = Rs1_vu & (sew - 1); + __int128_t val = Vs2_vi[i]; + + val = int_rounding<__int128_t>(val, + xc->readMiscReg(MISCREG_VXRM), sh); + Vd_vi[i] = val >> sh; + }}, OPIVX, VectorIntegerArithOp); + } + format VectorIntNarrowingFormat { + 0x2c: vnsrl_wx({{ + Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2d: vnsra_wx({{ + Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >> + ((vwu)Rs1_vu & (sew * 2 - 1))); + }}, OPIVX, VectorIntegerArithOp); + 0x2e: vnclipu_wx({{ + vu max = std::numeric_limits::max(); + uint64_t sign_mask = + std::numeric_limits::max() << sew; + __uint128_t res = Vs2_vwu[i]; + unsigned shift = Rs1_vu & ((sew * 2) - 1); + + res = int_rounding<__uint128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res & sign_mask) { + // TODO: vxsat + res = max; + } + + Vd_vu[i + offset] = (vu)res; + }}, OPIVX, VectorIntegerArithOp); + 0x2f: vnclip_wx({{ + vi max = std::numeric_limits::max(); + vi min = std::numeric_limits::min(); + __int128_t res = Vs2_vwi[i]; + unsigned shift = Rs1_vi & ((sew * 2) - 1); + + res = int_rounding<__int128_t>( + res, 0 /* TODO */, shift) >> shift; + + if (res < min) { + res = min; + // TODO: vxsat + } else if (res > max) { + res = max; + // TODO: vxsat + } + + Vd_vi[i + offset] = (vi)res; + }}, OPIVX, VectorIntegerArithOp); + } + + format VectorIntMaskFormat { + 0x11: decode VM { + 0x0: vmadc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmadc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + carry_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x13: decode VM { + 0x0: vmsbc_vxm({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi, + elem_mask(v0, ei))); + }}, OPIVX, VectorIntegerArithOp); + 0x1: vmsbc_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + borrow_out(Vs2_vi[i], Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + 0x18: vmseq_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] == Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x19: vmsne_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] != Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1a: vmsltu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] < Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1b: vmslt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] < Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1c: vmsleu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] <= Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1d: vmsle_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] <= Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + 0x1e: vmsgtu_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vu[i] > Rs1_vu)); + }}, OPIVX, VectorIntegerArithOp); + 0x1f: vmsgt_vx({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + (Vs2_vi[i] > Rs1_vi)); + }}, OPIVX, VectorIntegerArithOp); + } + } + // OPFVF + 0x5: decode VFUNCT6 { + format VectorFloatFormat{ + 0x00: vfadd_vf({{ + auto fd = fadd(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x02: vfsub_vf({{ + auto fd = fsub(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x04: vfmin_vf({{ + auto fd = fmin(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x06: vfmax_vf({{ + auto fd = fmax(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x08: vfsgnj_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x09: vfsgnjn_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + true, false).v; + }}, OPFVF, VectorFloatArithOp); + 0x0a: vfsgnjx_vf({{ + Vd_vu[i] = fsgnj(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)), + false, true).v; + }}, OPFVF, VectorFloatArithOp); + } + 0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPFVF, VectorMiscOp); + 0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPFVF, VectorMiscOp); + // VRFUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vfmv.s.f are reserved + 0x1: VectorNonSplitFormat::vfmv_s_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[0] = fd.v; + }}, OPFVV, VectorMiscOp); + } + } + format VectorFloatFormat{ + 0x17: decode VM { + 0x0: vfmerge_vfm({{ + Vd_vu[i] = elem_mask(v0, ei) + ? ftype_freg(freg(Fs1_bits)).v + : Vs2_vu[i]; + }}, OPFVF, VectorFloatArithOp); + 0x1: vfmv_v_f({{ + auto fd = ftype_freg(freg(Fs1_bits)); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + format VectorFloatMaskFormat { + 0x18: vmfeq_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x19: vmfle_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1b: vmflt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1c: vmfne_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + !feq(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits)))); + }}, OPFVF, VectorFloatArithOp); + 0x1d: vmfgt_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + flt(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + 0x1f: vmfge_vf({{ + Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset, + fle(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]))); + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatFormat{ + 0x20: vfdiv_vf({{ + auto fd = fdiv(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x21: vfrdiv_vf({{ + auto fd = fdiv(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x24: vfmul_vf({{ + auto fd = fmul(ftype(Vs2_vu[i]), + ftype_freg(freg(Fs1_bits))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x27: vfrsub_vf({{ + auto fd = fsub(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x28: vfmadd_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x29: vfnmadd_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2a: vfmsub_vf({{ + auto fd = fmadd(ftype(Vs3_vu[i]), + ftype_freg(freg(Fs1_bits)), + fneg(ftype(Vs2_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2b: vfnmsub_vf({{ + auto fd = fmadd(fneg(ftype(Vs3_vu[i])), + ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2c: vfmacc_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i])); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2d: vfnmacc_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i])) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2e: vfmsac_vf({{ + auto fd = fmadd(ftype_freg(freg(Fs1_bits)), + ftype(Vs2_vu[i]), + fneg(ftype(Vs3_vu[i]))); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x2f: vfnmsac_vf({{ + auto fd = fmadd( + fneg(ftype_freg(freg(Fs1_bits))), + ftype(Vs2_vu[i]), + ftype(Vs3_vu[i]) + ); + Vd_vu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + format VectorFloatWideningFormat { + 0x30: vfwadd_vf({{ + auto fd = fadd( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x32: vfwsub_vf({{ + auto fd = fsub( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x34: vfwadd_wf({{ + auto fd = fadd( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x36: vfwsub_wf({{ + auto fd = fsub( + ftype(Vs2_vwu[i]), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x38: vfwmul_vf({{ + auto fd = fmul( + fwiden(ftype(Vs2_vu[i + offset])), + fwiden(ftype_freg(freg(Fs1_bits)))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3c: vfwmacc_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3d: vfwnmacc_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3e: vfwmsac_vf({{ + auto fd = fmadd( + fwiden(ftype_freg(freg(Fs1_bits))), + fwiden(ftype(Vs2_vu[i + offset])), + fneg(ftype(Vs3_vwu[i]))); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + 0x3f: vfwnmsac_vf({{ + auto fd = fmadd( + fwiden(fneg(ftype_freg(freg(Fs1_bits)))), + fwiden(ftype(Vs2_vu[i + offset])), + ftype(Vs3_vwu[i])); + Vd_vwu[i] = fd.v; + }}, OPFVF, VectorFloatArithOp); + } + } + // OPMVX + 0x6: decode VFUNCT6 { + format VectorIntFormat { + 0x08: vaaddu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x09: vaadd_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + } + 0x0e: VectorSlideUpFormat::vslide1up_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vdIdx - vs2Idx; + const int offsetInVreg = offset - vregOffset * microVlmax; + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int vs2Offset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int elemOffset = vdOffset + vdIdx * microVlmax; + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + if (this->vm || elem_mask(v0, i + elemOffset)) { + Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + } + // TODO: dirty code + if (vdIdx == 0 && vs2Idx == 0 && + (this->vm || elem_mask(v0, 0))) { + tmp_d0.as()[0] = Rs1_vu; + } + } + }}, OPIVX, VectorMiscOp); + 0x0f: VectorSlideDownFormat::vslide1down_vx({{ + const int offset = 1; + const int microVlmax = vtype_VLMAX(machInst.vtype8, true); + const int vregOffset = vs2Idx - vdIdx; + const int offsetInVreg = offset - vregOffset * microVlmax; + const int numVs2s = vtype_regs_per_group(vtype); + if (std::abs(offsetInVreg) < uint32_t(microVlmax)) { + const bool needZeroTail = numVs2s == vs2Idx + 1; + const int upperBound = (offsetInVreg >= 0) + ? microVlmax - offsetInVreg + : microVlmax + offsetInVreg; + const int vdOffset = (offsetInVreg >= 0) + ? 0 + : -offsetInVreg; + const int vs2Offset = (offsetInVreg >= 0) + ? offsetInVreg + : 0; + const int elemIdxBase = vdIdx * microVlmax; + vreg_t resVreg; + auto res = resVreg.as(); + for (int i = 0; + i < upperBound && i + vdOffset < microVl; + i++) { + res[i + vdOffset] = Vs2_vu[i + vs2Offset]; + } + if (needZeroTail) { + for (int i = upperBound + vdOffset; + i < microVlmax; i++) { + res[i] = 0; + } + } + for (int i = vdOffset; i < microVl ; i++) { + if (vm || elem_mask(v0, i + elemIdxBase)) { + Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1) + ? res[i] + : Rs1_vu; + } + } + } + }}, OPIVX, VectorMiscOp); + // VRXUNARY0 + 0x10: decode VS2 { + 0x00: decode VM { + // The encodings corresponding to the masked versions + // (vm=0) of vmv.s.x are reserved. + 0x1: VectorNonSplitFormat::vmv_s_x({{ + Vd_vu[0] = Rs1_vu; + }}, OPMVX, VectorMiscOp); + } + } + format VectorIntFormat { + 0x0a: vasubu_vx({{ + __uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vu[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x0b: vasub_vx({{ + __uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi; + res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1); + Vd_vi[i] = res >> 1; + }}, OPMVX, VectorIntegerArithOp); + 0x20: vdivu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = (vu)-1; + else + Vd_vu[i] = Vs2_vu[i] / Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x21: vdiv_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = -1; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = Vs2_vi[i]; + else + Vd_vi[i] = Vs2_vi[i] / Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x22: vremu_vx({{ + if (Rs1_vu == 0) + Vd_vu[i] = Vs2_vu[i]; + else + Vd_vu[i] = Vs2_vu[i] % Rs1_vu; + }}, OPMVX, VectorIntegerArithOp); + 0x23: vrem_vx({{ + if (Rs1_vi == 0) + Vd_vi[i] = Vs2_vi[i]; + else if (Vs2_vi[i] == std::numeric_limits::min() + && Rs1_vi == -1) + Vd_vi[i] = 0; + else + Vd_vi[i] = Vs2_vi[i] % Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x24: vmulhu_vx({{ + if (sew < 64) + Vd_vu[i] = ((uint64_t)Vs2_vu[i] * Rs1_vu) + >> sew; + else + Vd_vu[i] = mulhu_64(Vs2_vu[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x25: vmul_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi; + }}, OPMVX, VectorIntegerArithOp); + 0x26: vmulhsu_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * + (uint64_t)Rs1_vu) + >> sew; + else + Vd_vi[i] = mulhsu_64(Vs2_vi[i], Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x27: vmulh_vx({{ + if (sew < 64) + Vd_vi[i] = ((int64_t)Vs2_vi[i] * Rs1_vi) + >> sew; + else + Vd_vi[i] = mulh_64(Vs2_vi[i], Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x29: vmadd_vx({{ + Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2b: vnmsub_vx({{ + Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2d: vmacc_vx({{ + Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x2f: vnmsac_vx({{ + Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + format VectorIntWideningFormat { + 0x30: vwaddu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x31: vwadd_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x32: vwsubu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x33: vwsub_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x34: vwaddu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x35: vwadd_wx({{ + Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x36: vwsubu_wx({{ + Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x37: vwsub_wx({{ + Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x38: vwmulu_vx({{ + Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3a: vwmulsu_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu); + }}, OPMVX, VectorIntegerArithOp); + 0x3b: vwmul_vx({{ + Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi); + }}, OPMVX, VectorIntegerArithOp); + 0x3c: vwmaccu_vx({{ + Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset]) + + Vs3_vwu[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3d: vwmacc_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3e: vwmaccus_vx({{ + Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + 0x3f: vwmaccsu_vx({{ + Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset]) + + Vs3_vwi[i]; + }}, OPMVX, VectorIntegerArithOp); + } + } 0x7: decode BIT31 { format VConfOp { 0x0: vsetvli({{ diff --git a/src/arch/riscv/isa/formats/formats.isa b/src/arch/riscv/isa/formats/formats.isa index 4bdc3021d5..0102df17d7 100644 --- a/src/arch/riscv/isa/formats/formats.isa +++ b/src/arch/riscv/isa/formats/formats.isa @@ -38,6 +38,7 @@ ##include "amo.isa" ##include "bs.isa" ##include "vector_conf.isa" +##include "vector_arith.isa" ##include "vector_mem.isa" // Include formats for nonstandard extensions diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa new file mode 100644 index 0000000000..c462e6c8d4 --- /dev/null +++ b/src/arch/riscv/isa/formats/vector_arith.isa @@ -0,0 +1,1319 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +let {{ + def setDestWrapper(destRegId): + return "setDestRegIdx(_numDestRegs++, " + destRegId + ");\n" + \ + "_numTypedDestRegs[VecRegClass]++;\n" + def setSrcWrapper(srcRegId): + return "setSrcRegIdx(_numSrcRegs++, " + srcRegId + ");\n" + def setSrcVm(): + return "if (!this->vm)\n" + \ + " setSrcRegIdx(_numSrcRegs++, vecRegClass[0]);" + def vmDeclAndReadData(): + return ''' + [[maybe_unused]] RiscvISA::vreg_t tmp_v0; + [[maybe_unused]] uint8_t* v0; + if(!machInst.vm) { + xc->getRegOperand(this, _numSrcRegs-1, &tmp_v0); + v0 = tmp_v0.as(); + } + ''' + def copyOldVd(vd_idx): + return 'COPY_OLD_VD(%d);' % vd_idx + def loopWrapper(code, micro_inst = True): + if micro_inst: + upper_bound = "this->microVl" + else: + upper_bound = "(uint32_t)machInst.vl" + return ''' + for (uint32_t i = 0; i < %s; i++) { + %s + } + ''' % (upper_bound, code) + def maskCondWrapper(code): + return "if (this->vm || elem_mask(v0, ei)) {\n" + \ + code + "}\n" + def eiDeclarePrefix(code, widening = False): + if widening: + return ''' + uint32_t ei = i + micro_vlmax * this->microIdx; + ''' + code + else: + return ''' + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + ''' + code + + def wideningOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned Vd group in Widening op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group if The destination EEW is greater than the source + // EEW, the source EMUL is at least 1, and the overlap is in the + // highest- numbered part of the destination register group. + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Widening op"); + return std::make_shared(error, machInst); + } + ''' + code + + def narrowingOpRegisterConstraintChecks(code): + return ''' + const uint32_t num_microops = 1 << std::max(0, vtype_vlmul(machInst.vtype8) + 1); + if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) { + std::string error = + csprintf("Unaligned VS2 group in Narrowing op"); + return std::make_shared(error, machInst); + } + if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) { + // A destination vector register group can overlap a source vector + // register group The destination EEW is smaller than the source EEW + // and the overlap is in the lowest-numbered part of the source + // register group + std::string error = + csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op"); + return std::make_shared(error, machInst); + } + ''' + code + + def fflags_wrapper(code): + return ''' + RegVal FFLAGS = xc->readMiscReg(MISCREG_FFLAGS); + std::feclearexcept(FE_ALL_EXCEPT); + ''' + code + ''' + FFLAGS |= softfloat_exceptionFlags; + softfloat_exceptionFlags = 0; + xc->setMiscReg(MISCREG_FFLAGS, FFLAGS); + ''' +}}; + + +def format VectorIntFormat(code, category, *flags) {{ + macroop_class_name = 'VectorArithMacroInst' + microop_class_name = 'VectorArithMicroInst' + + if name == "vid_v" : + macroop_class_name = 'VectorVMUNARY0MacroInst' + microp_class_name = 'VectorVMUNARY0MicroInst' + + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vxm', 'vim']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + num_src_regs = 0 + + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + num_src_regs += 1 + + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + num_src_regs += 1 + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + num_src_regs += 1 + elif category == "OPIVI": + pass + else: + error("not supported category for VectorIntFormat: %s" % category) + + old_vd_idx = num_src_regs + src3_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + + +def format VectorIntExtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + ext_div = int(inst_suffix[-1]) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / " + \ + str(ext_div) + "]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'ext_div': ext_div}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntExtMicroDeclare.subst(microiop) + \ + VectorIntMicroConstructor.subst(microiop) + \ + VectorIntExtMicroExecute.subst(microiop) + \ + VectorIntExtMacroDeclare.subst(iop) + \ + VectorIntMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV", "OPMVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX", "OPMVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wx"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntNarrowingFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + mask_cond = True + need_elem_idx = True + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + # code + code = maskCondWrapper(code) + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + }, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorIntNarrowingMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = not (inst_name in ["vmadc", "vmsbc"] \ + and inst_suffix in ["vv", "vx", "vi"]) + mask_cond = inst_name not in ['vmadc', 'vmsbc'] + need_elem_idx = mask_cond or code.find("ei") != -1 + + old_vd_idx = 2 + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPIVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPIVX": + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + #code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntMaskMicroDeclare.subst(microiop) + \ + VectorIntMaskMicroConstructor.subst(microiop) + \ + VectorIntMaskMicroExecute.subst(microiop) + \ + VectorIntMaskMacroDeclare.subst(iop) + \ + VectorIntMaskMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorGatherFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + if inst_name == "vrgatherei16": + idx_type = "uint16_t" + else: + idx_type = "elem_type" + iop = InstObjParams(name, Name, 'VectorArithMacroInst', + {'idx_type': idx_type, + 'code': code}, + flags) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + vd_idx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + vs1_idx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + vd_idx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + + # code + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx), + 'idx_type': idx_type}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorGatherMicroDeclare.subst(microiop) + \ + VectorGatherMicroConstructor.subst(microiop) + \ + VectorGatherMicroExecute.subst(microiop) + \ + VectorGatherMacroDeclare.subst(iop) + \ + VectorGatherMacroConstructor.subst(iop) + + decode_block = VectorGatherDecodeBlock.subst(iop) + +}}; + +def format VectorFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = inst_name not in ["vfmv"] + mask_cond = v0_required and (inst_suffix not in ['vvm', 'vfm']) + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorFloatMacroConstructor.subst(iop) + + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + v0_required = True + mask_cond = v0_required + need_elem_idx = mask_cond or code.find("ei") != -1 + + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPFVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]" + elif category in ["OPFVF"]: + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "" + if inst_suffix in ["vv", "vf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + elif inst_suffix in ["wv", "wf"]: + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + if v0_required: + set_src_reg_idx += setSrcVm() + + # code + if mask_cond: + code = maskCondWrapper(code) + if need_elem_idx: + code = eiDeclarePrefix(code, widening=True) + code = loopWrapper(code) + code = fflags_wrapper(code) + + code = wideningOpRegisterConstraintChecks(code) + + vm_decl_rd = "" + if v0_required: + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntWideningMicroDeclare.subst(microiop) + \ + VectorIntWideningMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorIntWideningMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatWideningCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatWideningMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + old_vd_idx = 1 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx / 2]" + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + code = narrowingOpRegisterConstraintChecks(code) + + vm_decl_rd = vmDeclAndReadData() + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatCvtMicroDeclare.subst(microiop) + \ + VectorFloatMicroConstructor.subst(microiop) + \ + VectorFloatNarrowingMicroExecute.subst(microiop) + \ + VectorFloatCvtMacroDeclare.subst(iop) + \ + VectorIntWideningMacroConstructor.subst(iop) + + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorFloatMaskFormat(code, category, *flags) {{ + iop = InstObjParams(name, + Name, + 'VectorArithMacroInst', + {'code': code}, + flags) + dest_reg_id = "vecRegClass[VecMemInternalReg0 + _microIdx]" + src1_reg_id = "" + if category == "OPFVV": + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category == "OPFVF": + src1_reg_id = "floatRegClass[_machInst.rs1]" + else: + error("not supported category for VectorFloatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorFloatMaskMicroDeclare.subst(microiop) + \ + VectorFloatMaskMicroConstructor.subst(microiop) + \ + VectorFloatMaskMicroExecute.subst(microiop) + \ + VectorFloatMaskMacroDeclare.subst(iop) + \ + VectorFloatMaskMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VMvWholeFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VMvWholeMacroInst', {'code': code}, flags) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VMvWholeMicroInst', + {'code': code}, + flags) + + header_output = \ + VMvWholeMacroDeclare.subst(iop) + \ + VMvWholeMicroDeclare.subst(microiop) + decoder_output = \ + VMvWholeMacroConstructor.subst(iop) + \ + VMvWholeMicroConstructor.subst(microiop) + exec_output = VMvWholeMicroExecute.subst(microiop) + decode_block = BasicDecode.subst(iop) +}}; + +def format ViotaFormat(code, category, *flags){{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + + microiop = InstObjParams(name+"_micro", + Name+"Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + ViotaMicroDeclare.subst(microiop) + \ + ViotaMicroConstructor.subst(microiop) + \ + ViotaMicroExecute.subst(microiop)+\ + ViotaMacroDeclare.subst(iop) + \ + ViotaMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) + +}}; + +def format Vector1Vs1VdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx, + 'copy_old_vd': copyOldVd(1)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1VdMaskConstructor.subst(iop) + \ + Vector1Vs1VdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format Vector1Vs1RdMaskFormat(code, category, *flags){{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = vmDeclAndReadData() + set_vm_idx = setSrcVm() + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + Vector1Vs1RdMaskDeclare.subst(iop) + \ + Vector1Vs1RdMaskConstructor.subst(iop) + \ + Vector1Vs1RdMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorNonSplitFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + vm_decl_rd = "" + + set_vm_idx = "" + + if inst_name == "vfmv" : + code = fflags_wrapper(code) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'vm_decl_rd': vm_decl_rd, + 'set_vm_idx': set_vm_idx}, + flags) + + + if inst_name == "vfmv" : + execute_block = VectorFloatNonSplitExecute.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) + elif inst_name == "vmv" : + execute_block = VectorIntNonSplitExecute.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) + else : + error("Unsupported inst for VectorNonSplitFormat: %s" % inst_name) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorNonSplitDeclare.subst(iop) + \ + VectorNonSplitConstructor.subst(iop) + \ + execute_block + +}}; + +def format VectorMaskFormat(code, category, *flags) {{ + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + if category not in ["OPMVV"]: + error("not supported category for VectorIntFormat: %s" % category) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + # TODO: remove it + old_dest_reg_id = "vecRegClass[_machInst.vd]" + + set_src_reg_idx = "" + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + code = loopWrapper(code, micro_inst = False) + + iop = InstObjParams(name, + Name, + 'VectorNonSplitInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorMaskDeclare.subst(iop) + \ + VectorMaskConstructor.subst(iop) + \ + VectorMaskExecute.subst(iop) + + decode_block = VectorMaskDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu = decltype(et::v); + ''' + + code = fflags_wrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatDecodeBlock.subst(iop) +}}; + +def format VectorReduceFloatWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + type_def = ''' + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + ''' + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'type_def': type_def, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceFloatWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorFloatWideningDecodeBlock.subst(iop) +}}; + +def format VectorIntVxsatFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + old_vd_idx = 2 + dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]" + src1_reg_id = "" + if category in ["OPIVV"]: + src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx]" + elif category in ["OPIVX"]: + src1_reg_id = "intRegClass[_machInst.rs1]" + elif category == "OPIVI": + old_vd_idx = 1 + else: + error("not supported category for VectorIntVxsatFormat: %s" % category) + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + src3_reg_id = "vecRegClass[_machInst.vs3 + _microIdx]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + + set_src_reg_idx = "" + if category != "OPIVI": + set_src_reg_idx += setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + set_src_reg_idx += setSrcWrapper(src3_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + + code = maskCondWrapper(code) + code = eiDeclarePrefix(code) + code = loopWrapper(code) + + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorIntVxsatMicroDeclare.subst(microiop) + \ + VectorIntVxsatMicroConstructor.subst(microiop) + \ + VectorIntMicroExecute.subst(microiop) + \ + VectorIntVxsatMacroDeclare.subst(iop) + \ + VectorIntVxsatMacroConstructor.subst(iop) + + decode_block = VectorIntDecodeBlock.subst(iop) +}}; + +def format VectorReduceIntWideningFormat(code, category, *flags) {{ + iop = InstObjParams(name, Name, 'VectorArithMacroInst', {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd]" + src1_reg_id = "vecRegClass[_machInst.vs1]" + src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]" + old_dest_reg_id = "vecRegClass[_machInst.vd]" + set_dest_reg_idx = setDestWrapper(dest_reg_id) + set_src_reg_idx = setSrcWrapper(src1_reg_id) + set_src_reg_idx += setSrcWrapper(src2_reg_id) + # Treat tail undisturbed/agnostic as the same + # We always need old rd as src vreg + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_src_reg_idx += setSrcVm() + vm_decl_rd = vmDeclAndReadData() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + 'VectorArithMicroInst', + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(2)}, + flags) + + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorReduceMicroDeclare.subst(microiop) + \ + VectorReduceMicroConstructor.subst(microiop) + \ + VectorReduceIntWideningMicroExecute.subst(microiop) + \ + VectorReduceMacroDeclare.subst(iop) + \ + VectorReduceMacroConstructor.subst(iop) + decode_block = VectorIntWideningDecodeBlock.subst(iop) +}}; + +let {{ + +def VectorSlideBase(name, Name, category, code, flags, macro_construtor, + decode_template, micro_execute_template): + macroop_class_name = 'VectorSlideMacroInst' + microop_class_name = 'VectorSlideMicroInst' + # Make sure flags are in lists (convert to lists if not). + flags = makeList(flags) + iop = InstObjParams(name, Name, macroop_class_name, {'code': code}, + flags) + inst_name, inst_suffix = name.split("_", maxsplit=1) + dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + src2_reg_id = "vecRegClass[_machInst.vs2 + vs2Idx]" + src1_ireg_id = "intRegClass[_machInst.rs1]" + src1_freg_id = "floatRegClass[_machInst.rs1]" + + # The tail of vector mask inst should be treated as tail-agnostic. + # We treat it with tail-undisturbed policy, since + # the test suits only support undisturbed policy. + num_src_regs = 0 + + old_dest_reg_id = "vecRegClass[_machInst.vd + vdIdx]" + set_src_reg_idx = "" + if category in ["OPIVX", "OPMVX"]: + set_src_reg_idx += setSrcWrapper(src1_ireg_id) + num_src_regs += 1 + elif category in ["OPFVF"]: + set_src_reg_idx += setSrcWrapper(src1_freg_id) + num_src_regs += 1 + set_src_reg_idx += setSrcWrapper(src2_reg_id) + num_src_regs += 1 + old_vd_idx = num_src_regs + set_src_reg_idx += setSrcWrapper(old_dest_reg_id) + set_dest_reg_idx = setDestWrapper(dest_reg_id) + vm_decl_rd = vmDeclAndReadData() + set_src_reg_idx += setSrcVm() + microiop = InstObjParams(name + "_micro", + Name + "Micro", + microop_class_name, + {'code': code, + 'set_dest_reg_idx': set_dest_reg_idx, + 'set_src_reg_idx': set_src_reg_idx, + 'vm_decl_rd': vm_decl_rd, + 'copy_old_vd': copyOldVd(old_vd_idx)}, + flags) + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + # Because of the use of templates, we had to put all parts in header to + # keep the compiler happy. + header_output = \ + VectorSlideMicroDeclare.subst(microiop) + \ + VectorSlideMicroConstructor.subst(microiop) + \ + micro_execute_template.subst(microiop) + \ + VectorSlideMacroDeclare.subst(iop) + \ + macro_construtor.subst(iop) + + decode_block = decode_template.subst(iop) + return (header_output, decode_block) + +}}; + +def format VectorSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorIntDecodeBlock, + micro_execute_template = VectorSlideMicroExecute) +}}; + +def format VectorFloatSlideUpFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideUpMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; + +def format VectorFloatSlideDownFormat(code, category, *flags) {{ + (header_output, decode_block) = VectorSlideBase(name, Name, category, code, + flags, + macro_construtor = VectorSlideDownMacroConstructor, + decode_template = VectorFloatDecodeBlock, + micro_execute_template = VectorFloatSlideMicroExecute) +}}; diff --git a/src/arch/riscv/isa/templates/templates.isa b/src/arch/riscv/isa/templates/templates.isa index b4de46d846..ed3f5287c0 100644 --- a/src/arch/riscv/isa/templates/templates.isa +++ b/src/arch/riscv/isa/templates/templates.isa @@ -1,2 +1,32 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + // Include ##include "vector_mem.isa" +##include "vector_arith.isa" diff --git a/src/arch/riscv/isa/templates/vector_arith.isa b/src/arch/riscv/isa/templates/vector_arith.isa new file mode 100644 index 0000000000..d15ab70f20 --- /dev/null +++ b/src/arch/riscv/isa/templates/vector_arith.isa @@ -0,0 +1,1989 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +output header {{ + +#define ASSIGN_VD_BIT(idx, bit) \ + ((Vd[(idx)/8] & ~(1 << (idx)%8)) | ((bit) << (idx)%8)) + +#define COPY_OLD_VD(idx) \ + [[maybe_unused]] RiscvISA::vreg_t old_vd; \ + [[maybe_unused]] decltype(Vd) old_Vd = nullptr; \ + xc->getRegOperand(this, (idx), &old_vd); \ + old_Vd = old_vd.as >(); \ + memcpy(Vd, old_Vd, VLENB); + +#define VRM_REQUIRED \ + uint_fast8_t frm = xc->readMiscReg(MISCREG_FRM); \ + if (frm > 4) \ + return std::make_shared("RM fault", machInst); \ + softfloat_roundingMode = frm; + +template +bool inline +carry_out(Type a, Type b, bool carry_in = false) { + using TypeU = std::make_unsigned_t; + TypeU s = *reinterpret_cast(&a) + + *reinterpret_cast(&b) + carry_in; + return carry_in + ? (s <= *reinterpret_cast(&a)) + : (s < *reinterpret_cast(&a)); +} + +template +bool inline +borrow_out(Type a, Type b, bool borrow_in = false) { + using TypeU = std::make_unsigned_t; + return borrow_in + ? (*reinterpret_cast(&a) <= *reinterpret_cast(&b)) + : (*reinterpret_cast(&a) < *reinterpret_cast(&b)); +} + +}}; + +def template VectorIntMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorIntExtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorIntExtMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + auto SEW = vtype_SEW(vtype); + auto offset = (VLEN / SEW) * (microIdx % %(ext_div)d); + switch (SEW / %(ext_div)d) { + case 8: { + using vext [[maybe_unused]] = int8_t; + using vextu [[maybe_unused]] = uint8_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 16: { + using vext [[maybe_unused]] = int16_t; + using vextu [[maybe_unused]] = uint16_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + case 32: { + using vext [[maybe_unused]] = int32_t; + using vextu [[maybe_unused]] = uint32_t; + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + break; + } + default: break; + } + + return NoFault; +} + +}}; + +def template VectorIntDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntWideningMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const int64_t vlmul = vtype_vlmul(_machInst.vtype8); + // Todo: move to Decode template + panic_if(vlmul == 3, "LMUL=8 is illegal for widening inst"); + // when LMUL setted as m1, need to split to 2 micro insts + const uint32_t num_microops = 1 << std::max(0, vlmul + 1); + + int32_t tmp_vl = this->vl; + const int32_t t_micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntWideningMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm for *.vv, *.vx + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntWideningMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorIntWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b000: return new %(class_name)s(machInst); +case 0b001: return new %(class_name)s(machInst); +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMacroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorFloatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1, vs2, vs3(old_vd), vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMicroConstructor {{ +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorFloatDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +case 0b011: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorFloatCvtMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + +def template VectorFloatCvtMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + std::string generateDisassembly(Addr pc, + const loader::SymbolTable *symtab) const override + { + std::stringstream ss; + ss << mnemonic << ' ' << registerName(destRegIdx(0)) << ", " + << registerName(srcRegIdx(0)); + if (machInst.vm == 0) ss << ", v0.t"; + return ss.str(); + } +}; + +}}; + + +def template VectorFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNarrowingMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu [[maybe_unused]] = decltype(et::v); + using ewt = typename double_width::type; + using vwu = decltype(ewt::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + VRM_REQUIRED; + + const int64_t vlmul = vtype_vlmul(machInst.vtype8); + const int32_t t_micro_vlmax = vtype_VLMAX(machInst.vtype8, true); + const int32_t micro_vlmax = vlmul < 0 ? t_micro_vlmax : t_micro_vlmax / 2; + [[maybe_unused]] const size_t offset = + (this->microIdx % 2 == 0) ? 0 : micro_vlmax; + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatWideningDecodeBlock {{ + +switch(machInst.vtype8.vsew) { +case 0b010: return new %(class_name)s(machInst); +default: GEM5_UNREACHABLE; +} + +}}; + +def template ViotaMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + int cnt = 0; + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + + +def template ViotaMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + + StaticInstPtr microop; + + // Allow one empty micro op to hold IsLastMicroop flag + for (int i = 0; i < num_microops && micro_vl >= 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i, + &cnt); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template ViotaMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + int* cnt; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, int* cnt); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template ViotaMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, int* cnt) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->cnt = cnt; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2]); +} + +}}; + +def template ViotaMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + + +def template Vector1Vs1VdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1VdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = uint8_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template Vector1Vs1RdMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template Vector1Vs1RdMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template Vector1Vs1RdMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_rd)s; + uint64_t Rd = 0; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +}; + +}}; + +def template VectorIntMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorIntMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv[m] or *.vx[m] + // vs2, old_vd, v0 for *.vi[m] + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorIntMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatMaskMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + microop = new VMaskMergeMicroInst(_machInst, _machInst.vd, + this->microops.size()); + this->microops.push_back(microop); + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorFloatMaskMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs1(rs1), vs2, old_vd, v0 for *.vv or *.vf + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorFloatMaskMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorFloatMaskMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + constexpr uint16_t bit_offset = VLENB / sizeof(ElemType); + const uint16_t offset = bit_offset * microIdx; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VMvWholeMacroDeclare {{ + +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMacroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = _machInst.simm3 + 1; + StaticInstPtr microop; + + for (int i = 0; i < num_microops; ++i) { + microop = new %(class_name)sMicro(_machInst, 0, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VMvWholeMicroDeclare {{ + +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[1]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VMvWholeMicroConstructor {{ + +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + setDestRegIdx(_numDestRegs++, vecRegClass[_machInst.vd + _microIdx]); + _numTypedDestRegs[VecRegClass]++; + setSrcRegIdx(_numSrcRegs++, vecRegClass[_machInst.vs2 + _microIdx]); +} + +}}; + +def template VMvWholeMicroExecute {{ + +Fault +%(class_name)s::execute(ExecContext* xc, trace::InstRecord* traceData) const +{ + // TODO: Check register alignment. + // TODO: If vd is equal to vs2 the instruction is an architectural NOP. + %(op_decl)s; + %(op_rd)s; + for (size_t i = 0; i < (VLEN / 64); i++) { + %(code)s; + } + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorMaskDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[3]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorMaskConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorMaskExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu = uint8_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + // TODO: remove it + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorMaskDecodeBlock {{ + +return new %(class_name)s(machInst); + +}}; + +def template VectorNonSplitDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + RegId srcRegIdxArr[2]; + RegId destRegIdxArr[1]; +public: + %(class_name)s(ExtMachInst _machInst); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorNonSplitConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + %(set_vm_idx)s; +} + +}}; + +def template VectorIntNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorFloatNonSplitExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, micro_vl, i); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorReduceMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorReduceMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + ElemType microop_result = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + microop_result = f(microop_result, Vs2[i]); + } + } + return microop_result; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorReduceFloatWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + %(type_def)s; + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]).v; + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorGatherMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + constexpr uint32_t vd_eewb = sizeof(ElemType); + constexpr uint32_t vs2_eewb = sizeof(ElemType); + constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr bool vs1_split = vd_eewb > vs1_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const int8_t vs1_emul = lmul + + (vs1_split ? -(vs2_eewb / vs1_eewb) : vs1_eewb / vs2_eewb); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + const uint8_t vs1_vregs = vs1_emul < 0 ? 1 : 1 << vs1_emul; + const uint8_t vd_vregs = vs2_vregs; + const int32_t micro_vlmax = VLENB / std::max(vd_eewb, vs1_eewb); + int32_t remaining_vl = this->vl; + int32_t micro_vl = std::min(remaining_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (uint8_t i = 0; i < std::max(vs1_vregs, vd_vregs) && micro_vl > 0; + i++) { + for (uint8_t j = 0; j < vs2_vregs; j++) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, i * vs2_vregs + j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(remaining_vl -= micro_vlmax, micro_vlmax); + } + + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorGatherMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vd, vm + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorGatherMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx) +: %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + [[maybe_unused]] constexpr uint32_t vd_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs2_eewb = sizeof(ElemType); + [[maybe_unused]] constexpr uint32_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + const int8_t lmul = vtype_vlmul(vtype); + const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = _microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + _microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + _microIdx / vs2_vregs / vd_split_num; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorGatherMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + [[maybe_unused]] constexpr size_t sew = sizeof(vu) * 8; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + const uint32_t vlmax = vtype_VLMAX(vtype); + constexpr uint8_t vd_eewb = sizeof(ElemType); + constexpr uint8_t vs1_eewb = sizeof(IndexType); + constexpr uint8_t vs2_eewb = sizeof(ElemType); + constexpr uint8_t vs1_split_num = (vd_eewb + vs1_eewb - 1) / vs1_eewb; + constexpr uint8_t vd_split_num = (vs1_eewb + vd_eewb - 1) / vd_eewb; + [[maybe_unused]] constexpr uint16_t vd_elems = VLENB / vd_eewb; + [[maybe_unused]] constexpr uint16_t vs1_elems = VLENB / vs1_eewb; + [[maybe_unused]] constexpr uint16_t vs2_elems = VLENB / vs2_eewb; + [[maybe_unused]] const int8_t lmul = vtype_vlmul(vtype); + [[maybe_unused]] const uint8_t vs2_vregs = lmul < 0 ? 1 : 1 << lmul; + [[maybe_unused]] const uint8_t vs2_idx = microIdx % vs2_vregs; + [[maybe_unused]] const uint8_t vs1_idx = + microIdx / vs2_vregs / vs1_split_num; + [[maybe_unused]] const uint8_t vd_idx = + microIdx / vs2_vregs / vd_split_num; + [[maybe_unused]] const uint16_t vs1_bias = + vs1_elems * (vd_idx % vs1_split_num) / vs1_split_num; + [[maybe_unused]] const uint16_t vd_bias = + vd_elems * (vs1_idx % vd_split_num) / vd_split_num; + + %(code)s; + %(op_wb)s; + + return NoFault; +} + +}}; + +def template VectorGatherDecodeBlock {{ + +switch(machInst.vtype8.vsew) { + case 0b000: { + using elem_type [[maybe_unused]] = uint8_t; + return new %(class_name)s(machInst); + } + case 0b001: { + using elem_type [[maybe_unused]] = uint16_t; + return new %(class_name)s(machInst); + } + case 0b010: { + using elem_type [[maybe_unused]] = uint32_t; + return new %(class_name)s(machInst); + } + case 0b011: { + using elem_type [[maybe_unused]] = uint64_t; + return new %(class_name)s(machInst); + } + default: GEM5_UNREACHABLE; +} + +}}; + +def template VectorIntVxsatMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s{ +private: + %(reg_idx_arr_decl)s; + bool vxsat = false; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + microop = new %(class_name)sMicro(_machInst, + micro_vl, i, &vxsat); + microop->setDelayedCommit(); + this->microops.push_back(microop); + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + + microop = new VxsatMicroInst(&vxsat, _machInst); + microop->setFlag(StaticInst::IsSerializeAfter); + microop->setFlag(StaticInst::IsNonSpeculative); + this->microops.push_back(microop); + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} +}}; + +def template VectorIntVxsatMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; + bool* vxsatptr; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, bool* vxsatptr); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorIntVxsatMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, bool* vxsatptr) + : %(base_class)s("%(mnemonic)s", _machInst, + %(op_class)s, _microVl, _microIdx) +{ + this->vm = _machInst.vm; + this->vxsatptr = vxsatptr; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorReduceIntWideningMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + using vwu [[maybe_unused]] = typename double_width::type; + using vwi [[maybe_unused]] = typename double_width::type; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + + Vd[0] = this->microIdx != 0 ? old_Vd[0] : Vs1[0]; + + auto reduce_loop = + [&, this](const auto& f, const auto* _, const auto* vs2) { + vwu tmp_val = Vd[0]; + for (uint32_t i = 0; i < this->microVl; i++) { + uint32_t ei = i + vtype_VLMAX(vtype, true) * this->microIdx; + if (this->vm || elem_mask(v0, ei)) { + tmp_val = f(tmp_val, Vs2[i]); + } + } + return tmp_val; + }; + + %(code)s; + %(op_wb)s; + return NoFault; +} + +}}; + +def template VectorSlideMacroDeclare {{ + +template +class %(class_name)s : public %(base_class)s { +private: + %(reg_idx_arr_decl)s; +public: + %(class_name)s(ExtMachInst _machInst); + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideUpMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = 0; j <= i; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideDownMacroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s) +{ + %(set_reg_idx_arr)s; + %(constructor)s; + const uint32_t num_microops = vtype_regs_per_group(vtype); + int32_t tmp_vl = this->vl; + const int32_t micro_vlmax = vtype_VLMAX(_machInst.vtype8, true); + int32_t micro_vl = std::min(tmp_vl, micro_vlmax); + StaticInstPtr microop; + + if (micro_vl == 0) { + microop = new VectorNopMicroInst(_machInst); + this->microops.push_back(microop); + } + // Todo static filter out useless uop + int micro_idx = 0; + for (int i = 0; i < num_microops && micro_vl > 0; ++i) { + for (int j = i; j < num_microops; ++j) { + microop = new %(class_name)sMicro( + _machInst, micro_vl, micro_idx++, i, j); + microop->setDelayedCommit(); + this->microops.push_back(microop); + } + micro_vl = std::min(tmp_vl -= micro_vlmax, micro_vlmax); + } + this->microops.front()->setFirstMicroop(); + this->microops.back()->setLastMicroop(); +} + +}}; + +def template VectorSlideMicroDeclare {{ + +template +class %(class_name)s : public %(base_class)s +{ +private: + // vs2, vs1, vs3(old_vd), vm for *.vv, *.vx + // vs2, (old_vd), vm for *.vi + RegId srcRegIdxArr[4]; + RegId destRegIdxArr[1]; + bool vm; +public: + %(class_name)s(ExtMachInst _machInst, uint8_t _microVl, + uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx); + Fault execute(ExecContext* xc, trace::InstRecord* traceData)const override; + using %(base_class)s::generateDisassembly; +}; + +}}; + +def template VectorSlideMicroConstructor {{ + +template +%(class_name)s::%(class_name)s(ExtMachInst _machInst, + uint8_t _microVl, uint8_t _microIdx, uint8_t _vdIdx, uint8_t _vs2Idx) + : %(base_class)s("%(mnemonic)s", _machInst, %(op_class)s, _microVl, + _microIdx, _vdIdx, _vs2Idx) +{ + this->vm = _machInst.vm; + %(set_reg_idx_arr)s; + _numSrcRegs = 0; + _numDestRegs = 0; + %(set_dest_reg_idx)s; + %(set_src_reg_idx)s; +} + +}}; + +def template VectorSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using vu [[maybe_unused]] = std::make_unsigned_t; + using vi [[maybe_unused]] = std::make_signed_t; + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; + +def template VectorFloatSlideMicroExecute {{ + +template +Fault +%(class_name)s::execute(ExecContext* xc, + trace::InstRecord* traceData) const +{ + using et = ElemType; + using vu = decltype(et::v); + + if (machInst.vill) + return std::make_shared("VILL is set", machInst); + [[maybe_unused]]const uint32_t vlmax = vtype_VLMAX(vtype); + + %(op_decl)s; + %(op_rd)s; + %(vm_decl_rd)s; + %(copy_old_vd)s; + %(code)s; + %(op_wb)s; + + return NoFault; +}; + +}}; diff --git a/src/arch/riscv/isa/templates/vector_mem.isa b/src/arch/riscv/isa/templates/vector_mem.isa index d54243ad7d..f8be1e555b 100644 --- a/src/arch/riscv/isa/templates/vector_mem.isa +++ b/src/arch/riscv/isa/templates/vector_mem.isa @@ -1,3 +1,31 @@ +// -*- mode:c++ -*- + +// Copyright (c) 2022 PLCT Lab +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer; +// redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution; +// neither the name of the copyright holders nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + def template VMemMacroDeclare {{ class %(class_name)s : public %(base_class)s diff --git a/src/arch/riscv/regs/float.hh b/src/arch/riscv/regs/float.hh index 4809372070..cca9e1be2f 100644 --- a/src/arch/riscv/regs/float.hh +++ b/src/arch/riscv/regs/float.hh @@ -211,6 +211,20 @@ const std::vector RegNames = { } // namespace float_reg +inline float32_t +fsgnj32(float32_t a, float32_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f32(insertBits(b.v, 30, 0, a.v)); +} + +inline float64_t +fsgnj64(float64_t a, float64_t b, bool n, bool x) { + if (n) b.v = ~b.v; + else if (x) b.v = a.v ^ b.v; + return f64(insertBits(b.v, 62, 0, a.v)); +} + } // namespace RiscvISA } // namespace gem5 diff --git a/src/arch/riscv/utility.hh b/src/arch/riscv/utility.hh index 1db6d6df3b..40054aec0f 100644 --- a/src/arch/riscv/utility.hh +++ b/src/arch/riscv/utility.hh @@ -241,6 +241,13 @@ remu(T rs1, T rs2) return (rs2 == 0) ? rs1 : rs1 % rs2; } +// Vector extension functions +inline uint64_t +vtype_SEW(const uint64_t vtype) +{ + return 8 << bits(vtype, 5, 3); +} + /* * Encode LMUL to lmul as follows: * LMUL vlmul lmul @@ -269,6 +276,25 @@ vtype_VLMAX(const uint64_t vtype, const bool per_reg = false) return gem5::RiscvISA::VLEN >> (vsew + 3 - lmul); } +inline int64_t +vtype_vlmul(const uint64_t vtype) +{ + return (int64_t)sext<3>(bits(vtype, 2, 0)); +} + +inline uint64_t +vtype_regs_per_group(const uint64_t vtype) +{ + int64_t lmul = (int64_t)sext<3>(bits(vtype, 2, 0)); + return 1 << std::max(0, lmul); +} + +inline void +vtype_set_vill(uint64_t& vtype) +{ + vtype = (uint64_t)0 ^ (1UL << (sizeof(RegVal) * 8 - 1)); +} + inline uint64_t width_EEW(uint64_t width) { @@ -296,6 +322,461 @@ elem_mask(const T* vs, const int index) return (vs[idx] >> pos) & 1; } +template struct double_width; +template<> struct double_width { using type = uint16_t;}; +template<> struct double_width { using type = uint32_t;}; +template<> struct double_width { using type = uint64_t;}; +template<> struct double_width { using type = int16_t; }; +template<> struct double_width { using type = int32_t; }; +template<> struct double_width { using type = int64_t; }; +template<> struct double_width { using type = float64_t;}; + +template struct double_widthf; +template<> struct double_widthf { using type = float64_t;}; +template<> struct double_widthf { using type = float64_t;}; + +template auto +ftype(IntType a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +// TODO: Consolidate ftype_freg(freg_t a) and ftype(IntType a) into a +// single function +template auto +ftype_freg(freg_t a) -> FloatType +{ + if constexpr(std::is_same_v) + return f32(a); + else if constexpr(std::is_same_v) + return f64(a); + GEM5_UNREACHABLE; +} + +template FloatType +fadd(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_add(a, b); + else if constexpr(std::is_same_v) + return f64_add(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsub(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_sub(a, b); + else if constexpr(std::is_same_v) + return f64_sub(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmin(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_min(a, b); + else if constexpr(std::is_same_v) + return f64_min(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmax(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_max(a, b); + else if constexpr(std::is_same_v) + return f64_max(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fdiv(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_div(a, b); + else if constexpr(std::is_same_v) + return f64_div(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmul(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_mul(a, b); + else if constexpr(std::is_same_v) + return f64_mul(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fsqrt(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_sqrt(a); + else if constexpr(std::is_same_v) + return f64_sqrt(a); + GEM5_UNREACHABLE; +} + +template FloatType +frsqrte7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_rsqrte7(a); + else if constexpr(std::is_same_v) + return f64_rsqrte7(a); + GEM5_UNREACHABLE; +} + +template FloatType +frecip7(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_recip7(a); + else if constexpr(std::is_same_v) + return f64_recip7(a); + GEM5_UNREACHABLE; +} + +template FloatType +fclassify(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(f32_classify(a)); + else if constexpr(std::is_same_v) + return f64(f64_classify(a)); + GEM5_UNREACHABLE; +} + +template FloatType +fsgnj(FloatType a, FloatType b, bool n, bool x) +{ + if constexpr(std::is_same_v) + return fsgnj32(a, b, n, x); + else if constexpr(std::is_same_v) + return fsgnj64(a, b, n, x); + GEM5_UNREACHABLE; +} + +template bool +fle(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_le(a, b); + else if constexpr(std::is_same_v) + return f64_le(a, b); + GEM5_UNREACHABLE; +} + +template bool +feq(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_eq(a, b); + else if constexpr(std::is_same_v) + return f64_eq(a, b); + GEM5_UNREACHABLE; +} + +template bool +flt(FloatType a, FloatType b) +{ + if constexpr(std::is_same_v) + return f32_lt(a, b); + else if constexpr(std::is_same_v) + return f64_lt(a, b); + GEM5_UNREACHABLE; +} + +template FloatType +fmadd(FloatType a, FloatType b, FloatType c) +{ + if constexpr(std::is_same_v) + return f32_mulAdd(a, b, c); + else if constexpr(std::is_same_v) + return f64_mulAdd(a, b, c); + GEM5_UNREACHABLE; +} + +template FloatType +fneg(FloatType a) +{ + if constexpr(std::is_same_v) + return f32(a.v ^ uint32_t(mask(31, 31))); + else if constexpr(std::is_same_v) + return f64(a.v ^ mask(63, 63)); + GEM5_UNREACHABLE; +} + +template::type> WFT +fwiden(FT a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template IntType +f_to_ui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui32(a, mode, true); + else if constexpr(std::is_same_v) + return f64_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f32_to_ui64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_nui(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return f64_to_ui32(a, mode, true); + GEM5_UNREACHABLE; +} + +template IntType +f_to_i(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f32_to_i32(a, mode, true); + else if constexpr(std::is_same_v) + return (uint64_t)f64_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> IntType +f_to_wi(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint64_t)f32_to_i64(a, mode, true); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> IntType +f_to_ni(FloatType a, uint_fast8_t mode) +{ + if constexpr(std::is_same_v) + return (uint32_t)f64_to_i32(a, mode, true); + GEM5_UNREACHABLE; +} + +template +FloatType +ui_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f32(a); + else if constexpr(std::is_same_v) + return ui64_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +ui_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return ui32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = decltype(double_width::type::v) +> FloatType +ui_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return ui64_to_f32(a); + GEM5_UNREACHABLE; +} + +template +FloatType +i_to_f(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f32((int32_t)a); + else if constexpr(std::is_same_v) + return i64_to_f64((int64_t)a); + GEM5_UNREACHABLE; +} + +template< + typename IntType, + typename FloatType = typename double_widthf::type +> FloatType +i_to_wf(IntType a) +{ + if constexpr(std::is_same_v) + return i32_to_f64((int32_t)a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename IntType = std::make_signed_t< + decltype(double_width::type::v) + > +> FloatType +i_to_nf(IntType a) +{ + if constexpr(std::is_same_v) + return i64_to_f32(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatType, + typename FloatWType = typename double_width::type +> FloatWType +f_to_wf(FloatType a) +{ + if constexpr(std::is_same_v) + return f32_to_f64(a); + GEM5_UNREACHABLE; +} + +template< + typename FloatNType, + typename FloatType = typename double_width::type +> FloatNType +f_to_nf(FloatType a) +{ + if constexpr(std::is_same_v) + return f64_to_f32(a); + GEM5_UNREACHABLE; +} + +//ref: https://locklessinc.com/articles/sat_arithmetic/ +template T +sat_add(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux + uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) | ~(uy ^ res)) >= 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_sub(T x, T y, bool* sat) +{ + using UT = std::make_unsigned_t; + UT ux = x; + UT uy = y; + UT res = ux - uy; + + int sh = sizeof(T) * 8 - 1; + + ux = (ux >> sh) + (((UT)0x1 << sh) - 1); + + if ((T) ((ux ^ uy) & (ux ^ res)) < 0) { + res = ux; + *sat = true; + } + return res; +} + +template T +sat_addu(T x, T y, bool* sat) +{ + T res = x + y; + + bool t = res < x; + if (false == *sat){ + *sat = t; + } + res |= -(res < x); + + return res; +} + +template T +sat_subu(T x, T y, bool* sat) +{ + T res = x - y; + + bool t = !(res <= x); + if (false == *sat){ + *sat = t; + } + + res &= -(res <= x); + + return res; +} + +/** + * Ref: + * https://github.com/riscv-software-src/riscv-isa-sim + */ +template T +int_rounding(T result, uint8_t xrm, unsigned gb) { + const uint64_t lsb = 1UL << gb; + const uint64_t lsb_half = lsb >> 1; + switch (xrm) { + case 0 /* RNU */: + result += lsb_half; + break; + case 1 /* RNE */: + if ((result & lsb_half) && + ((result & (lsb_half - 1)) || (result & lsb))) + result += lsb; + break; + case 2 /* RDN */: + break; + case 3 /* ROD */: + if (result & (lsb - 1)) + result |= lsb; + break; + default: + panic("Invalid xrm value %d", (int)xrm); + } + + return result; +} + } // namespace RiscvISA } // namespace gem5 From ae651f4de1a981a780bd450b031e6258ad022123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Armejach?= Date: Fri, 21 Jul 2023 18:36:48 +0200 Subject: [PATCH 07/10] configs: update riscv restore checkpoint test Change-Id: I019fc6394a03196711ab52533ad8062b22c89daf --- .../gem5_library/checkpoints/riscv-hello-restore-checkpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py index 9f9bf839a6..eed76e2448 100644 --- a/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py +++ b/configs/example/gem5_library/checkpoints/riscv-hello-restore-checkpoint.py @@ -90,7 +90,7 @@ board = SimpleBoard( board.set_se_binary_workload( # the workload should be the same as the save-checkpoint script obtain_resource("riscv-hello"), - checkpoint=obtain_resource("riscv-hello-example-checkpoint-v23"), + checkpoint=obtain_resource("riscv-hello-example-checkpoint"), ) simulator = Simulator( From af1b2ec2d5768a08bd42f11466be991302149482 Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Fri, 28 Jul 2023 09:44:28 -0700 Subject: [PATCH 08/10] arch-riscv: Add fatal if RVV used with o3 or minor Since the O3 and Minor CPU models do not support RVV right now as the implementation stalls the decode until vsetvl instructions are exectued, this change calls `fatal` if RVV is not explicitly enabled. It is possible to override this if you explicitly enable RVV in the config file. Change-Id: Ia801911141bb2fb2bedcff3e139bf41ba8936085 Signed-off-by: Jason Lowe-Power --- src/arch/riscv/RiscvCPU.py | 15 +++++++++++++-- src/arch/riscv/RiscvDecoder.py | 1 + src/arch/riscv/RiscvISA.py | 2 ++ src/arch/riscv/decoder.cc | 12 ++++++++++++ src/arch/riscv/decoder.hh | 6 ++---- src/arch/riscv/isa.cc | 9 ++++++--- src/arch/riscv/isa.hh | 3 +++ 7 files changed, 39 insertions(+), 9 deletions(-) diff --git a/src/arch/riscv/RiscvCPU.py b/src/arch/riscv/RiscvCPU.py index 1c77045c67..449bf5e7af 100644 --- a/src/arch/riscv/RiscvCPU.py +++ b/src/arch/riscv/RiscvCPU.py @@ -41,6 +41,17 @@ class RiscvCPU: ArchISA = RiscvISA +class RiscvISANoRVV(RiscvISA): + enable_rvv = False + + +class RiscvCPUNoRVV: + ArchDecoder = RiscvDecoder + ArchMMU = RiscvMMU + ArchInterrupts = RiscvInterrupts + ArchISA = RiscvISANoRVV + + class RiscvAtomicSimpleCPU(BaseAtomicSimpleCPU, RiscvCPU): mmu = RiscvMMU() @@ -53,9 +64,9 @@ class RiscvTimingSimpleCPU(BaseTimingSimpleCPU, RiscvCPU): mmu = RiscvMMU() -class RiscvO3CPU(BaseO3CPU, RiscvCPU): +class RiscvO3CPU(BaseO3CPU, RiscvCPUNoRVV): mmu = RiscvMMU() -class RiscvMinorCPU(BaseMinorCPU, RiscvCPU): +class RiscvMinorCPU(BaseMinorCPU, RiscvCPUNoRVV): mmu = RiscvMMU() diff --git a/src/arch/riscv/RiscvDecoder.py b/src/arch/riscv/RiscvDecoder.py index 30c1077662..4100a3c5b3 100644 --- a/src/arch/riscv/RiscvDecoder.py +++ b/src/arch/riscv/RiscvDecoder.py @@ -24,6 +24,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from m5.objects.InstDecoder import InstDecoder +from m5.params import * class RiscvDecoder(InstDecoder): diff --git a/src/arch/riscv/RiscvISA.py b/src/arch/riscv/RiscvISA.py index bb9a05babe..f66171a95a 100644 --- a/src/arch/riscv/RiscvISA.py +++ b/src/arch/riscv/RiscvISA.py @@ -56,3 +56,5 @@ class RiscvISA(BaseISA): True, "whether to check memory access alignment" ) riscv_type = Param.RiscvType("RV64", "RV32 or RV64") + + enable_rvv = Param.Bool(True, "Enable vector extension") diff --git a/src/arch/riscv/decoder.cc b/src/arch/riscv/decoder.cc index ce362ad522..702d84fd91 100644 --- a/src/arch/riscv/decoder.cc +++ b/src/arch/riscv/decoder.cc @@ -28,6 +28,7 @@ */ #include "arch/riscv/decoder.hh" +#include "arch/riscv/isa.hh" #include "arch/riscv/types.hh" #include "base/bitfield.hh" #include "debug/Decode.hh" @@ -38,6 +39,13 @@ namespace gem5 namespace RiscvISA { +Decoder::Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) +{ + ISA *isa = dynamic_cast(p.isa); + enableRvv = isa->getEnableRvv(); + reset(); +} + void Decoder::reset() { aligned = true; @@ -53,6 +61,10 @@ Decoder::moreBytes(const PCStateBase &pc, Addr fetchPC) // TODO: Current vsetvl instructions stall decode. Future fixes should // enable speculation, and this code will be removed. if (GEM5_UNLIKELY(!this->vConfigDone)) { + fatal_if(!enableRvv, + "Vector extension is not enabled for this CPU type\n" + "You can manually enable vector extensions by setting rvv_enabled " + "to true for each ISA object after `createThreads()`\n"); DPRINTF(Decode, "Waiting for vset*vl* to be executed\n"); instDone = false; outOfBytes = false; diff --git a/src/arch/riscv/decoder.hh b/src/arch/riscv/decoder.hh index d1d2f3cb0c..1f510e8280 100644 --- a/src/arch/riscv/decoder.hh +++ b/src/arch/riscv/decoder.hh @@ -61,6 +61,7 @@ class Decoder : public InstDecoder ExtMachInst emi; uint32_t machInst; + bool enableRvv = false; VTYPE machVtype; uint32_t machVl; @@ -72,10 +73,7 @@ class Decoder : public InstDecoder StaticInstPtr decode(ExtMachInst mach_inst, Addr addr); public: - Decoder(const RiscvDecoderParams &p) : InstDecoder(p, &machInst) - { - reset(); - } + Decoder(const RiscvDecoderParams &p); void reset() override; diff --git a/src/arch/riscv/isa.cc b/src/arch/riscv/isa.cc index 2f9d52e1b2..84205eb57a 100644 --- a/src/arch/riscv/isa.cc +++ b/src/arch/riscv/isa.cc @@ -253,7 +253,8 @@ RegClass ccRegClass(CCRegClass, CCRegClassName, 0, debug::IntRegs); } // anonymous namespace ISA::ISA(const Params &p) : - BaseISA(p), rv_type(p.riscv_type), checkAlignment(p.check_alignment) + BaseISA(p), rv_type(p.riscv_type), checkAlignment(p.check_alignment), + enableRvv(p.enable_rvv) { _regClasses.push_back(&intRegClass); _regClasses.push_back(&floatRegClass); @@ -324,8 +325,10 @@ void ISA::clear() case RV64: misa.rv64_mxl = 2; status.uxl = status.sxl = 2; - status.vs = VPUStatus::INITIAL; - misa.rvv = 1; + if (getEnableRvv()) { + status.vs = VPUStatus::INITIAL; + misa.rvv = 1; + } break; default: panic("%s: Unknown rv_type: %d", name(), (int)rv_type); diff --git a/src/arch/riscv/isa.hh b/src/arch/riscv/isa.hh index d7b0a21a1f..1be45ac7fa 100644 --- a/src/arch/riscv/isa.hh +++ b/src/arch/riscv/isa.hh @@ -75,6 +75,7 @@ class ISA : public BaseISA RiscvType rv_type; std::vector miscRegFile; bool checkAlignment; + bool enableRvv; bool hpmCounterEnabled(int counter) const; @@ -138,6 +139,8 @@ class ISA : public BaseISA RiscvType rvType() const { return rv_type; } + bool getEnableRvv() const { return enableRvv; } + void clearLoadReservation(ContextID cid) { From 98d68a7307e2f93e547acf2ff67c957a198bd0ac Mon Sep 17 00:00:00 2001 From: Jason Lowe-Power Date: Fri, 28 Jul 2023 09:46:21 -0700 Subject: [PATCH 09/10] arch-riscv: Improve style Minor style fixes in vector code Change-Id: If0de45a2dbfb5d5aaa65ed3b5d91d9bee9bcc960 Signed-off-by: Jason Lowe-Power --- src/arch/riscv/insts/vector.cc | 20 +++++++++++--------- src/arch/riscv/insts/vector.hh | 33 +++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/arch/riscv/insts/vector.cc b/src/arch/riscv/insts/vector.cc index a1ccf402c9..6ecec44dc5 100644 --- a/src/arch/riscv/insts/vector.cc +++ b/src/arch/riscv/insts/vector.cc @@ -56,18 +56,20 @@ namespace RiscvISA * **/ float -getVflmul(uint32_t vlmul_encoding) { - int vlmul = sext<3>(vlmul_encoding & 7); - float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul); - return vflmul; +getVflmul(uint32_t vlmul_encoding) +{ + int vlmul = sext<3>(vlmul_encoding & 7); + float vflmul = vlmul >= 0 ? 1 << vlmul : 1.0 / (1 << -vlmul); + return vflmul; } uint32_t -getVlmax(VTYPE vtype, uint32_t vlen) { - uint32_t sew = getSew(vtype.vsew); - // vlmax is defined in RVV 1.0 spec p12 chapter 3.4.2. - uint32_t vlmax = (vlen/sew) * getVflmul(vtype.vlmul); - return vlmax; +getVlmax(VTYPE vtype, uint32_t vlen) +{ + uint32_t sew = getSew(vtype.vsew); + // vlmax is defined in RVV 1.0 spec p12 chapter 3.4.2. + uint32_t vlmax = (vlen/sew) * getVflmul(vtype.vlmul); + return vlmax; } std::string diff --git a/src/arch/riscv/insts/vector.hh b/src/arch/riscv/insts/vector.hh index 5d0874a994..cae0dcac0a 100644 --- a/src/arch/riscv/insts/vector.hh +++ b/src/arch/riscv/insts/vector.hh @@ -47,7 +47,9 @@ namespace RiscvISA float getVflmul(uint32_t vlmul_encoding); -inline uint32_t getSew(uint32_t vsew) { +inline uint32_t +getSew(uint32_t vsew) +{ assert(vsew <= 3); return (8 << vsew); } @@ -124,7 +126,7 @@ class VectorMacroInst : public RiscvMacroInst class VectorMicroInst : public RiscvMicroInst { -protected: + protected: uint8_t microVl; uint8_t microIdx; uint8_t vtype; @@ -420,7 +422,7 @@ class VsStrideMacroInst : public VectorMemMacroInst class VsStrideMicroInst : public VectorMemMicroInst { protected: - uint8_t regIdx; + uint8_t regIdx; VsStrideMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _regIdx, uint8_t _microIdx, uint8_t _microVl) @@ -487,9 +489,9 @@ class VsIndexMicroInst : public VectorMemMicroInst VsIndexMicroInst(const char *mnem, ExtMachInst _machInst, OpClass __opClass, uint8_t _vs3RegIdx, uint8_t _vs3ElemIdx, uint8_t _vs2RegIdx, uint8_t _vs2ElemIdx) - : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0) - , vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx) - , vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) + : VectorMemMicroInst(mnem, _machInst, __opClass, 1, 0, 0), + vs3RegIdx(_vs3RegIdx), vs3ElemIdx(_vs3ElemIdx), + vs2RegIdx(_vs2RegIdx), vs2ElemIdx(_vs2ElemIdx) {} std::string generateDisassembly( @@ -532,7 +534,7 @@ class VMaskMergeMicroInst : public VectorArithMicroInst VMaskMergeMicroInst(ExtMachInst extMachInst, uint8_t _dstReg, uint8_t _numSrcs) : VectorArithMicroInst("vmask_mv_micro", extMachInst, - VectorIntegerArithOp, 0, 0) + VectorIntegerArithOp, 0, 0) { setRegIdxArrays( reinterpret_cast( @@ -550,8 +552,9 @@ class VMaskMergeMicroInst : public VectorArithMicroInst } } - Fault execute(ExecContext* xc, trace::InstRecord* traceData) - const override { + Fault + execute(ExecContext* xc, trace::InstRecord* traceData) const override + { vreg_t tmp_d0 = *(vreg_t *)xc->getWritableRegOperand(this, 0); auto Vd = tmp_d0.as(); constexpr uint8_t elems_per_vreg = VLENB / sizeof(ElemType); @@ -582,8 +585,10 @@ class VMaskMergeMicroInst : public VectorArithMicroInst return NoFault; } - std::string generateDisassembly( - Addr pc, const loader::SymbolTable *symtab) const override { + std::string + generateDisassembly(Addr pc, const loader::SymbolTable *symtab) + const override + { std::stringstream ss; ss << mnemonic << ' ' << registerName(destRegIdx(0)); for (uint8_t i = 0; i < this->_numSrcRegs; i++) { @@ -605,8 +610,8 @@ class VxsatMicroInst : public VectorArithMicroInst { vxsat = Vxsat; } - Fault execute(ExecContext* xc, trace::InstRecord* traceData) - const override + Fault + execute(ExecContext* xc, trace::InstRecord* traceData) const override { xc->setMiscReg(MISCREG_VXSAT,*vxsat); auto vcsr = xc->readMiscReg(MISCREG_VCSR); @@ -614,7 +619,7 @@ class VxsatMicroInst : public VectorArithMicroInst return NoFault; } std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) - const override + const override { std::stringstream ss; ss << mnemonic << ' ' << "VXSAT" << ", " << (*vxsat ? "0x1" : "0x0"); From 884d62b33af866664960e399ddc09b654753b794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Armejach?= Date: Wed, 2 Aug 2023 14:05:21 +0200 Subject: [PATCH 10/10] arch-riscv: Make vset*vl* instructions serialize Current implementation of vset*vl* instructions serialize pipeline and are non-speculative. Change-Id: Ibf93b60133fb3340690b126db12827e36e2c202d --- src/arch/riscv/isa/decoder.isa | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 2b46752ffe..d34adfaa02 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -4344,7 +4344,7 @@ decode QUADRANT default Unknown::unknown() { uint64_t requested_vtype = zimm11; Rd_ud = 0; - }}, VectorConfigOp, IsDirectControl, IsCondControl); + }}, VectorConfigOp, IsSerializeAfter, IsNonSpeculative); 0x1: decode BIT30 { 0x0: vsetvl({{ uint64_t rd_bits = RD; @@ -4353,7 +4353,8 @@ decode QUADRANT default Unknown::unknown() { uint64_t requested_vtype = Rs2_ud; Rd_ud = 0; - }}, VectorConfigOp, IsDirectControl, IsCondControl); + }}, VectorConfigOp, IsSerializeAfter, + IsNonSpeculative); 0x1: vsetivli({{ uint64_t rd_bits = RD; uint64_t rs1_bits = -1; @@ -4361,7 +4362,8 @@ decode QUADRANT default Unknown::unknown() { uint64_t requested_vtype = zimm10; Rd_ud = 0; - }}, VectorConfigOp, IsDirectControl, IsCondControl); + }}, VectorConfigOp, IsSerializeAfter, + IsNonSpeculative); } } }