gem5/src/arch/riscv/isa/decoder.isa

// -*- mode:c++ -*-

// Copyright (c) 2015 RISC-V Foundation
// Copyright (c) 2017 The University of Virginia
// Copyright (c) 2020 Barkhausen Institut
// Copyright (c) 2021 StreamComputing Corp
// Copyright (c) 2022 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

////////////////////////////////////////////////////////////////////
//
// The RISC-V ISA decoder
//

// In theory, all registers should be sign extended if not operating in the
// full MXLEN register, but that will cause memory address out of range as it is
// always regarded as uint64. So we'll zero extend PC related registers and
// memory address, and sign extend others.
decode QUADRANT default Unknown::unknown() {
    0x0: decode COPCODE {
        0x0: CIAddi4spnOp::c_addi4spn({{
            imm = CIMM8<1:1> << 2 |
                  CIMM8<0:0> << 3 |
                  CIMM8<7:6> << 4 |
                  CIMM8<5:2> << 6;
        }}, {{
            if (imm == 0)
                return std::make_shared<IllegalInstFault>("immediate = 0",
                                                           machInst);
            Rp2 = rvSext(sp + imm);
        }}, uint64_t);
        format CompressedLoad {
            0x1: c_fld({{
                offset = CIMM3 << 3 | CIMM2 << 6;
            }}, {{
                STATUS status = xc->readMiscReg(MISCREG_STATUS);
                if (status.fs == FPUStatus::OFF)
                    return std::make_shared<IllegalInstFault>("FPU is off",
                                                               machInst);

                // Mutating any floating point register changes the FS bit
                // of the STATUS CSR.
                status.fs = FPUStatus::DIRTY;
                xc->setMiscReg(MISCREG_STATUS, status);

                Fp2_bits = Mem;
            }}, {{
                EA = rvZext(Rp1 + offset);
            }});
            0x2: c_lw({{
                offset = CIMM2<1:1> << 2 |
                         CIMM3 << 3 |
                         CIMM2<0:0> << 6;
            }}, {{
                Rp2_sd = Mem_sw;
            }}, {{
                EA = rvZext(Rp1 + offset);
            }});
            0x3: decode RVTYPE {
                0x0: c_flw({{
                    offset = CIMM2<1:1> << 2 |
                             CIMM3 << 3 |
                             CIMM2<0:0> << 6;
                }}, {{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>("FPU is off",
                                                                   machInst);

                    status.fs = FPUStatus::DIRTY;
                    xc->setMiscReg(MISCREG_STATUS, status);

                    freg_t fd = freg(f32(Mem_uw));
                    Fp2_bits = fd.v;
                }}, {{
                    EA = (uint32_t)(Rp1_uw + offset);
                }});
                0x1: c_ld({{
                    offset = CIMM3 << 3 | CIMM2 << 6;
                }}, {{
                    Rp2_sd = Mem_sd;
                }}, {{
                    EA = Rp1 + offset;
                }});
            }
        }
        0x4: decode CFUNCT6LOW3 {
            format CompressedLoad {
                0x0: c_lbu({{
                    offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
                }}, {{
                    Rp2 = Mem_ub;
                }}, {{
                    EA = rvZext(Rp1 + offset);
                }});
                0x1: decode CFUNCT1BIT6 {
                    0x0: c_lhu({{
                        offset = CIMM2<0:0> << 1;
                    }}, {{
                        Rp2 = Mem_uh;
                    }}, {{
                        EA = rvZext(Rp1 + offset);
                    }});
                    0x1: c_lh({{
                        offset = CIMM2<0:0> << 1;
                    }}, {{
                        Rp2_sd = Mem_sh;
                    }}, {{
                        EA = rvZext(Rp1 + offset);
                    }});
                }
            }
            format CompressedStore {
                0x2: c_sb({{
                    offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
                }}, {{
                    Mem_ub = Rp2_ub;
                }}, ea_code={{
                    EA = rvZext(Rp1 + offset);
                }});
                0x3: c_sh({{
                    offset = (CIMM2<0:0> << 1);
                }}, {{
                    Mem_uh = Rp2_uh;
                }}, ea_code={{
                    EA = rvZext(Rp1 + offset);
                }});
            }
        }
        format CompressedStore {
            0x5: c_fsd({{
                offset = CIMM3 << 3 | CIMM2 << 6;
            }}, {{
                STATUS status = xc->readMiscReg(MISCREG_STATUS);
                if (status.fs == FPUStatus::OFF)
                    return std::make_shared<IllegalInstFault>("FPU is off",
                                                               machInst);

                Mem = Fp2_bits;
            }}, {{
                EA = rvZext(Rp1 + offset);
            }});
            0x6: c_sw({{
                offset = CIMM2<1:1> << 2 |
                         CIMM3 << 3 |
                         CIMM2<0:0> << 6;
            }}, {{
                Mem_uw = Rp2_uw;
            }}, ea_code={{
                EA = rvZext(Rp1 + offset);
            }});
            0x7: decode RVTYPE {
                0x0: c_fsw({{
                    offset = CIMM2<1:1> << 2 |
                             CIMM3 << 3 |
                             CIMM2<0:0> << 6;
                }}, {{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>("FPU is off",
                                                                   machInst);

                    Mem_uw = unboxF32(boxF32(Fs2_bits));
                }}, {{
                    EA = (uint32_t)(Rp1_uw + offset);
                }});
                0x1: c_sd({{
                    offset = CIMM3 << 3 | CIMM2 << 6;
                }}, {{
                    Mem_ud = Rp2_ud;
                }}, {{
                    EA = Rp1 + offset;
                }});
            }
        }
    }
    0x1: decode COPCODE {
        0x0: CIOp::c_addi({{
            imm = sext<6>(CIMM5 | (CIMM1 << 5));
        }}, {{
            if ((RC1 == 0) != (imm == 0)) {
                if (RC1 == 0) {
                    // imm != 0 is HINT
                } else {
                    // imm == 0 is HINT
                }
            }
            Rc1_sd = rvSext(Rc1_sd + imm);
        }});
        0x1: decode RVTYPE {
            0x0: CJOp::c_jal({{
                ra_sw = NPC_uw;
                NPC_uw = PC_uw + imm;
            }}, IsDirectControl, IsUncondControl, IsCall);
            0x1: CIOp::c_addiw({{
                imm = sext<6>(CIMM5 | (CIMM1 << 5));
            }}, {{
                if (RC1 == 0) {
                    return std::make_shared<IllegalInstFault>(
                            "source reg x0", machInst);
                }
                Rc1_sw = (int32_t)(Rc1_sw + imm);
            }});
        }
        0x2: CIOp::c_li({{
            imm = sext<6>(CIMM5 | (CIMM1 << 5));
        }}, {{
            // RC1 == 0 is HINT
            Rc1_sd = imm;
        }});
        0x3: decode RC1 {
            0x2: CIOp::c_addi16sp({{
                imm = sext<10>((CIMM5<4:4> << 4) |
                               (CIMM5<0:0> << 5) |
                               (CIMM5<3:3> << 6) |
                               (CIMM5<2:1> << 7) |
                               (CIMM1 << 9));
            }}, {{
                if (imm == 0) {
                    return std::make_shared<IllegalInstFault>(
                            "immediate = 0", machInst);
                }
                sp_sd = rvSext(sp_sd + imm);
            }});
            default: CIOp::c_lui({{
                imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12;
            }}, {{
                // RC1 == 0 is HINT
                if (imm == 0) {
                    return std::make_shared<IllegalInstFault>(
                            "immediate = 0", machInst);
                }
                Rc1_sd = imm;
            }});
        }
        0x4: decode CFUNCT2HIGH {
            format CIOp {
                0x0: c_srli({{
                    imm = CIMM5 | (CIMM1 << 5);
                }}, {{
                    if (rvSelect((bool)CIMM1, false)) {
                        return std::make_shared<IllegalInstFault>(
                                "shmat[5] != 0", machInst);
                    }
                    if (imm == 0) {
                        // C.SRLI64, HINT for RV32/RV64
                    }
                    // The MSB can never be 1, hence no need to sign ext.
                    Rp1 = rvZext(Rp1) >> imm;
                }}, uint64_t);
                0x1: c_srai({{
                    imm = CIMM5 | (CIMM1 << 5);
                }}, {{
                    if (rvSelect((bool)CIMM1, false)) {
                        return std::make_shared<IllegalInstFault>(
                                "shmat[5] != 0", machInst);
                    }
                    if (imm == 0) {
                        // C.SRAI64, HINT for RV32/RV64
                    }
                    Rp1_sd = rvSext(Rp1_sd) >> imm;
                }}, uint64_t);
                0x2: c_andi({{
                    imm = CIMM5;
                    if (CIMM1 > 0)
                        imm |= ~((uint64_t)0x1F);
                }}, {{
                    Rp1 = rvSext(Rp1 & imm);
                }}, uint64_t);
            }
            format CompressedROp {
                0x3: decode CFUNCT1 {
                    0x0: decode CFUNCT2LOW {
                        0x0: c_sub({{
                            Rp1 = rvSext(Rp1 - Rp2);
                        }});
                        0x1: c_xor({{
                            Rp1 = rvSext(Rp1 ^ Rp2);
                        }});
                        0x2: c_or({{
                            Rp1 = rvSext(Rp1 | Rp2);
                        }});
                        0x3: c_and({{
                            Rp1 = rvSext(Rp1 & Rp2);
                        }});
                    }
                    0x1: decode CFUNCT2LOW {
                        0x0: decode RVTYPE {
                            0x1: c_subw({{
                                Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
                            }});
                        }
                        0x1: decode RVTYPE {
                            0x1: c_addw({{
                                Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
                            }});
                        }
                        0x2: c_mul({{
                            Rp1_sd = rvSext(Rp1_sd * Rp2_sd);
                        }}, IntMultOp);
                        0x3: decode RP2 {
                            0x0: c_zext_b({{
                                Rp1 = Rp1 & 0xFFULL;
                            }});
                            0x1: c_sext_b({{
                                Rp1 = sext<8>(Rp1 & 0xFFULL);
                            }});
                            0x2: c_zext_h({{
                                Rp1 = Rp1 & 0xFFFFULL;
                            }});
                            0x3: c_sext_h({{
                                Rp1 = sext<16>(Rp1 & 0xFFFFULL);
                            }});
                            0x4: decode RVTYPE {
                                0x1: c_zext_w({{
                                    Rp1 = bits(Rp1, 31, 0);
                                }});
                            }
                            0x5: c_not({{
                                Rp1 = ~Rp1;
                            }});
                        }
                    }
                }
            }
        }
        0x5: CJOp::c_j({{
            NPC = rvZext(PC + imm);
        }}, IsDirectControl, IsUncondControl);
        format CBOp {
            0x6: c_beqz({{
                if (rvSext(Rp1) == 0)
                    NPC = rvZext(PC + imm);
                else
                    NPC = NPC;
            }}, IsDirectControl, IsCondControl);
            0x7: c_bnez({{
                if (rvSext(Rp1) != 0)
                    NPC = rvZext(PC + imm);
                else
                    NPC = NPC;
            }}, IsDirectControl, IsCondControl);
        }
    }
    0x2: decode COPCODE {
        0x0: CIOp::c_slli({{
            imm = CIMM5 | (CIMM1 << 5);
        }}, {{
            if (rvSelect((bool)CIMM1, false)) {
                return std::make_shared<IllegalInstFault>(
                        "shmat[5] != 0", machInst);
            }
            if (imm == 0) {
                // C.SLLI64, HINT for RV32/RV64
            }
            // RC1 == 0 is HINT
            Rc1 = rvSext(Rc1 << imm);
        }}, uint64_t);
        format CompressedLoad {
            0x1: c_fldsp({{
                offset = CIMM5<4:3> << 3 |
                         CIMM1 << 5 |
                         CIMM5<2:0> << 6;
            }}, {{
                STATUS status = xc->readMiscReg(MISCREG_STATUS);
                if (status.fs == FPUStatus::OFF)
                    return std::make_shared<IllegalInstFault>("FPU is off",
                                                               machInst);

                status.fs = FPUStatus::DIRTY;
                xc->setMiscReg(MISCREG_STATUS, status);

                Fc1_bits = Mem;
            }}, {{
                EA = rvZext(sp + offset);
            }});
            0x2: c_lwsp({{
                offset = CIMM5<4:2> << 2 |
                         CIMM1 << 5 |
                         CIMM5<1:0> << 6;
            }}, {{
                if (RC1 == 0) {
                    return std::make_shared<IllegalInstFault>(
                            "source reg x0", machInst);
                }
                Rc1_sw = Mem_sw;
            }}, {{
                EA = rvZext(sp + offset);
            }});
            0x3: decode RVTYPE {
                0x0: c_flwsp({{
                    offset = CIMM5<4:2> << 2 |
                             CIMM1 << 5 |
                             CIMM5<1:0> << 6;
                }}, {{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>("FPU is off",
                                                                   machInst);

                    status.fs = FPUStatus::DIRTY;
                    xc->setMiscReg(MISCREG_STATUS, status);

                    freg_t fd;
                    fd = freg(f32(Mem_uw));
                    Fd_bits = fd.v;
                }}, {{
                    EA = (uint32_t)(sp_uw + offset);
                }});
                0x1: c_ldsp({{
                    offset = CIMM5<4:3> << 3 |
                             CIMM1 << 5 |
                             CIMM5<2:0> << 6;
                }}, {{
                    if (RC1 == 0) {
                        return std::make_shared<IllegalInstFault>(
                                "source reg x0", machInst);
                    }
                    Rc1_sd = Mem_sd;
                }}, {{
                    EA = sp + offset;
                }});
            }
        }
        0x4: decode CFUNCT1 {
            0x0: decode RC2 {
                0x0: Jump::c_jr({{
                    if (RC1 == 0) {
                        return std::make_shared<IllegalInstFault>(
                                "source reg x0", machInst);
                    }
                    NPC = rvZext(Rc1);
                }}, IsIndirectControl, IsUncondControl);
                default: CROp::c_mv({{
                    // RC1 == 0 is HINT
                    Rc1 = rvSext(Rc2);
                }});
            }
            0x1: decode RC2 {
                0x0: decode RC1 {
                    0x0: SystemOp::c_ebreak({{
                        return std::make_shared<BreakpointFault>(
                            xc->pcState());
                    }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                    default: Jump::c_jalr({{
                        ra = rvSext(NPC);
                        NPC = rvZext(Rc1);
                    }}, IsIndirectControl, IsUncondControl, IsCall);
                }
                default: CompressedROp::c_add({{
                    // RC1 == 0 is HINT
                    Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
                }});
            }
        }
        format CompressedStore {
            0x5: c_fsdsp({{
                offset = CIMM6<5:3> << 3 |
                         CIMM6<2:0> << 6;
            }}, {{
                STATUS status = xc->readMiscReg(MISCREG_STATUS);
                if (status.fs == FPUStatus::OFF)
                    return std::make_shared<IllegalInstFault>("FPU is off",
                                                               machInst);

                Mem_ud = Fc2_bits;
            }}, {{
                EA = rvZext(sp + offset);
            }});
            0x6: c_swsp({{
                offset = CIMM6<5:2> << 2 |
                         CIMM6<1:0> << 6;
            }}, {{
                Mem_uw = Rc2_uw;
            }}, {{
                EA = rvZext(sp + offset);
            }});
            0x7: decode RVTYPE {
                0x0: c_fswsp({{
                    offset = CIMM6<5:2> << 2 |
                             CIMM6<1:0> << 6;
                }}, {{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>("FPU is off",
                                                                   machInst);

                    Mem_uw = unboxF32(boxF32(Fs2_bits));
                }}, {{
                    EA = (uint32_t)(sp_uw + offset);
                }});
                0x1: c_sdsp({{
                    offset = CIMM6<5:3> << 3 |
                             CIMM6<2:0> << 6;
                }}, {{
                    Mem = Rc2;
                }}, {{
                    EA = sp + offset;
                }});
            }
        }
    }
    0x3: decode OPCODE5 {
        0x00: decode FUNCT3 {
            format Load {
                0x0: lb({{
                    Rd_sd = Mem_sb;
                }});
                0x1: lh({{
                    Rd_sd = Mem_sh;
                }});
                0x2: lw({{
                    Rd_sd = Mem_sw;
                }});
                0x3: decode RVTYPE {
                    0x1: ld({{
                        Rd_sd = Mem_sd;
                    }});
                }
                0x4: lbu({{
                    Rd = Mem_ub;
                }});
                0x5: lhu({{
                    Rd = Mem_uh;
                }});
                0x6: decode RVTYPE {
                    0x1: lwu({{
                        Rd = Mem_uw;
                    }});
                }
            }
        }

        0x01: decode FUNCT3 {
            format Load {
                0x1: flh({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                    "FPU is off", machInst);

                    status.fs = FPUStatus::DIRTY;
                    xc->setMiscReg(MISCREG_STATUS, status);

                    freg_t fd;
                    fd = freg(f16(Mem_uh));
                    Fd_bits = fd.v;
                }}, inst_flags=FloatMemReadOp);
                0x2: flw({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                    "FPU is off", machInst);

                    status.fs = FPUStatus::DIRTY;
                    xc->setMiscReg(MISCREG_STATUS, status);

                    freg_t fd;
                    fd = freg(f32(Mem_uw));
                    Fd_bits = fd.v;
                }}, inst_flags=FloatMemReadOp);
                0x3: fld({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                    "FPU is off", machInst);

                    status.fs = FPUStatus::DIRTY;
                    xc->setMiscReg(MISCREG_STATUS, status);

                    freg_t fd;
                    fd = freg(f64(Mem));
                    Fd_bits = fd.v;
                }}, inst_flags=FloatMemReadOp);
            }

            0x0: decode MOP {
                0x0: decode LUMOP {
                   0x00: decode NF {
                        0x00: VleOp::vle8_v({{
                            if ((machInst.vm || elem_mask(v0, ei)) &&
                                i < this->microVl) {
                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                            } else {
                                Vd_ub[i] = Vs2_ub[i];
                            }
                        }}, inst_flags=VectorUnitStrideLoadOp);
                        format VlSegOp {
                            0x01: vlseg2e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x02: vlseg3e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x03: vlseg4e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x04: vlseg5e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x05: vlseg6e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x06: vlseg7e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x07: vlseg8e8_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
                                    i < this->microVl) {
                                    Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                                } else {
                                    Vd_ub[i] = Vs2_ub[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                        }
                    }
                    0x08: decode NF {
                        format VlWholeOp {
                            0x0: vl1re8_v({{
                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x1: vl2re8_v({{
                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x3: vl4re8_v({{
                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x7: vl8re8_v({{
                                Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                        }
                    }
                    0x0b: VlmOp::vlm_v({{
                        Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                    }}, inst_flags=VectorUnitStrideMaskLoadOp);
                    0x10: VleOp::vle8ff_v({{
                        if ((machInst.vm || elem_mask(v0, ei)) &&
                            i < this->microVl && i < this->faultIdx) {
                            Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
                        } else {
                            Vd_ub[i] = Vs2_ub[i];
                        }
                    }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
                }
                0x1: VlIndexOp::vluxei8_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
                0x2: VlStrideOp::vlse8_v({{
                    Vd_ub[microIdx] = Mem_vc.as<uint8_t>()[0];
                }}, inst_flags=VectorStridedLoadOp);
                0x3: VlIndexOp::vloxei8_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
            }
            0x5: decode MOP {
                0x0: decode LUMOP {
                    0x00: decode NF {
                        0x00: VleOp::vle16_v({{
                            if ((machInst.vm || elem_mask(v0, ei)) &&
                                i < this->microVl) {
                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                            } else {
                                Vd_uh[i] = Vs2_uh[i];
                            }
                        }}, inst_flags=VectorUnitStrideLoadOp);
                        format VlSegOp {
                            0x01: vlseg2e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x02: vlseg3e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x03: vlseg4e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x04: vlseg5e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x05: vlseg6e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x06: vlseg7e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x07: vlseg8e16_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
                                    i < this->microVl) {
                                    Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                                } else {
                                    Vd_uh[i] = Vs2_uh[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                        }
                    }
                    0x08: decode NF {
                        format VlWholeOp {
                            0x0: vl1re16_v({{
                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x1: vl2re16_v({{
                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x3: vl4re16_v({{
                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x7: vl8re16_v({{
                                Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                        }
                    }
                    0x10: VleOp::vle16ff_v({{
                        if ((machInst.vm || elem_mask(v0, ei)) &&
                            i < this->microVl && i < this->faultIdx) {
                            Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
                        } else {
                            Vd_uh[i] = Vs2_uh[i];
                        }
                    }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
                }
                0x1: VlIndexOp::vluxei16_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
                0x2: VlStrideOp::vlse16_v({{
                    Vd_uh[microIdx] = Mem_vc.as<uint16_t>()[0];
                }}, inst_flags=VectorStridedLoadOp);
                0x3: VlIndexOp::vloxei16_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
            }
            0x6: decode MOP {
                0x0: decode LUMOP {
                    0x00: decode NF {
                        0x00: VleOp::vle32_v({{
                            if ((machInst.vm || elem_mask(v0, ei)) &&
                                i < this->microVl) {
                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                            } else {
                                Vd_uw[i] = Vs2_uw[i];
                            }
                        }}, inst_flags=VectorUnitStrideLoadOp);
                        format VlSegOp {
                            0x01: vlseg2e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x02: vlseg3e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x03: vlseg4e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x04: vlseg5e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x05: vlseg6e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x06: vlseg7e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x07: vlseg8e32_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
                                    i < this->microVl) {
                                    Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                                } else {
                                    Vd_uw[i] = Vs2_uw[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                        }
                    }
                    0x08: decode NF {
                        format VlWholeOp {
                            0x0: vl1re32_v({{
                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x1: vl2re32_v({{
                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x3: vl4re32_v({{
                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x7: vl8re32_v({{
                                Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                        }
                    }
                    0x10: VleOp::vle32ff_v({{
                        if ((machInst.vm || elem_mask(v0, ei)) &&
                            i < this->microVl && i < this->faultIdx) {
                            Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
                        } else {
                            Vd_uw[i] = Vs2_uw[i];
                        }
                    }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
                }
                0x1: VlIndexOp::vluxei32_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
                0x2: VlStrideOp::vlse32_v({{
                    Vd_uw[microIdx] = Mem_vc.as<uint32_t>()[0];
                }}, inst_flags=VectorStridedLoadOp);
                0x3: VlIndexOp::vloxei32_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
            }
            0x7: decode MOP {
                0x0: decode LUMOP {
                    0x00: decode NF {
                        0x00: VleOp::vle64_v({{
                            if ((machInst.vm || elem_mask(v0, ei)) &&
                                i < this->microVl) {
                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                            } else {
                                Vd_ud[i] = Vs2_ud[i];
                            }
                        }}, inst_flags=VectorUnitStrideLoadOp);
                        format VlSegOp {
                            0x01: vlseg2e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x02: vlseg3e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x03: vlseg4e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x04: vlseg5e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x05: vlseg6e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x06: vlseg7e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                            0x07: vlseg8e64_v({{
                                if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
                                    i < this->microVl) {
                                    Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                                } else {
                                    Vd_ud[i] = Vs2_ud[i];
                                }
                            }}, inst_flags=VectorUnitStrideSegmentedLoadOp);
                        }
                    }
                    0x08: decode NF {
                        format VlWholeOp {
                            0x0: vl1re64_v({{
                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x1: vl2re64_v({{
                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x3: vl4re64_v({{
                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                            0x7: vl8re64_v({{
                                Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                            }}, inst_flags=VectorWholeRegisterLoadOp);
                        }
                    }
                    0x10: VleOp::vle64ff_v({{
                        if ((machInst.vm || elem_mask(v0, ei)) &&
                            i < this->microVl && i < this->faultIdx) {
                            Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
                        } else {
                            Vd_ud[i] = Vs2_ud[i];
                        }
                    }}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
                }
                0x1: VlIndexOp::vluxei64_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
                0x2: VlStrideOp::vlse64_v({{
                    Vd_ud[microIdx] = Mem_vc.as<uint64_t>()[0];
                }}, inst_flags=VectorStridedLoadOp);
                0x3: VlIndexOp::vloxei64_v({{
                    Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
                }}, {{
                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
                }}, inst_flags=VectorIndexedLoadOp);
            }
        }

        0x03: decode FUNCT3 {
            format FenceOp {
                0x0: fence({{
                }}, uint64_t, IsReadBarrier, IsWriteBarrier, No_OpClass);
                0x1: fence_i({{
                }}, uint64_t, IsNonSpeculative, IsSerializeAfter,
                    IsSquashAfter, No_OpClass);
            }

            0x2: decode FUNCT12 {
                format CBMOp {
                    0x0: cbo_inval({{
                        Mem = 0;
                    }}, mem_flags=[INVALIDATE, DST_POC]);
                    0x1: cbo_clean({{
                        Mem = 0;
                    }}, mem_flags=[CLEAN, DST_POC]);
                    0x2: cbo_flush({{
                        Mem = 0;
                    }}, mem_flags=[CLEAN, INVALIDATE, DST_POC]);
                    0x4: cbo_zero({{
                        Mem = 0;
                    }}, mem_flags=[CACHE_BLOCK_ZERO]);
                }
            }
        }

        0x04: decode FUNCT3 {
            0x1: decode FS3 {
                format IOp {
                    0x00: slli({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        Rd = rvSext(Rs1 << imm);
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x01: decode RVTYPE {
                        0x0: zip({{
                            Rd_sw = _rvk_emu_zip_32(Rs1_sw);
                        }}, imm_code = {{ imm = SHAMT5; }});
                    }
                    0x02: decode FS2 {
                        0x0: sha256sum0({{
                            Rd_sw = _rvk_emu_sha256sum0(Rs1_sw);
                        }});
                        0x1: sha256sum1({{
                            Rd_sw = _rvk_emu_sha256sum1(Rs1_sw);
                        }});
                        0x2: sha256sig0({{
                            Rd_sw = _rvk_emu_sha256sig0(Rs1_sw);
                        }});
                        0x3: sha256sig1({{
                            Rd_sw = _rvk_emu_sha256sig1(Rs1_sw);
                        }});
                        0x4: decode RVTYPE {
                            0x1: sha512sum0({{
                                Rd_sd = _rvk_emu_sha512sum0(Rs1_sd);
                            }});
                        }
                        0x5: decode RVTYPE {
                            0x1: sha512sum1({{
                                Rd_sd = _rvk_emu_sha512sum1(Rs1_sd);
                            }});
                        }
                        0x6: decode RVTYPE {
                            0x1: sha512sig0({{
                                Rd_sd = _rvk_emu_sha512sig0(Rs1_sd);
                            }});
                        }
                        0x7: decode RVTYPE {
                            0x1: sha512sig1({{
                                Rd_sd = _rvk_emu_sha512sig1(Rs1_sd);
                            }});
                        }
                        0x8: sm3p0({{
                            Rd_sw = _rvk_emu_sm3p0(Rs1_sw);
                        }});
                        0x9: sm3p1({{
                            Rd_sw = _rvk_emu_sm3p1(Rs1_sw);
                        }});
                    }
                    0x05: bseti({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 | (UINT64_C(1) << index));
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x06: decode BIT24 {
                        0x0: decode RVTYPE {
                            0x1: aes64im({{
                                Rd_sd = _rvk_emu_aes64im(Rs1_sd);
                            }});
                        }
                        0x1: decode RVTYPE {
                            0x1: aes64ks1i({{
                                Rd_sd = _rvk_emu_aes64ks1i(Rs1_sd, imm);
                            }}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
                        }
                    }
                    0x09: bclri({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 & (~(UINT64_C(1) << index)));
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x0d: binvi({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 ^ (UINT64_C(1) << index));
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                }
                format ROp {
                    0x0c: decode RS2 {
                        0x00: clz({{
                            Rd = (machInst.rv_type == RV32) ? clz32(Rs1) : clz64(Rs1);
                        }});
                        0x01: ctz({{
                            Rd = (machInst.rv_type == RV32) ? ctz32(Rs1) : ctz64(Rs1);
                        }});
                        0x02: cpop({{
                            Rd = (machInst.rv_type == RV32) ? popCount(Rs1<31:0>) : popCount(Rs1);
                        }});
                        0x04: sext_b({{
                            Rd = sext<8>(Rs1_ub);
                        }});
                        0x05: sext_h({{
                            Rd = sext<16>(Rs1_uh);
                        }});
                    }
                }
            }

            format IOp {
                0x0: addi({{
                    Rd_sd = rvSext(Rs1_sd + imm);
                }});
                0x2: slti({{
                    Rd = (rvSext(Rs1_sd) < imm) ? 1 : 0;
                }});
                0x3: sltiu({{
                    Rd = (rvZext(Rs1) < imm) ? 1 : 0;
                }}, uint64_t, imm_code = {{ imm = rvZext(sext<12>(IMM12)); }});
                0x4: xori({{
                    Rd = rvSext(Rs1 ^ imm);
                }}, uint64_t);
                0x5: decode FS3 {
                    0x0: srli({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        Rd = rvSext(rvZext(Rs1) >> imm);
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x1: decode RVTYPE {
                        0x0: unzip({{
                            Rd_sw = _rvk_emu_unzip_32(Rs1_sw);
                        }}, imm_code = {{ imm = SHAMT5; }});
                    }
                    0x5: orc_b({{
                        uint64_t result = 0;
                        result |= (Rs1<7:0> ? UINT64_C(0xff) : 0x0);
                        result |= (Rs1<15:8> ? UINT64_C(0xff) : 0x0) << 8;
                        result |= (Rs1<23:16> ? UINT64_C(0xff) : 0x0) << 16;
                        result |= (Rs1<31:24> ? UINT64_C(0xff) : 0x0) << 24;
                        result |= (Rs1<39:32> ? UINT64_C(0xff) : 0x0) << 32;
                        result |= (Rs1<47:40> ? UINT64_C(0xff) : 0x0) << 40;
                        result |= (Rs1<55:48> ? UINT64_C(0xff) : 0x0) << 48;
                        result |= (Rs1<63:56> ? UINT64_C(0xff) : 0x0) << 56;
                        Rd = rvSext(result);
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x8: srai({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        Rd_sd = rvSext(Rs1_sd) >> imm;
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0x9: bexti({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
                        Rd = (Rs1 >> index) & 0x1;
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0xc: rori({{
                        if (rvSelect((bool)SHAMT6BIT5, false)) {
                            return std::make_shared<IllegalInstFault>(
                                    "shmat[5] != 0", machInst);
                        }
                        uint64_t xlen = rvSelect(32, 64);
                        Rd = rvSext((rvZext(Rs1) >> imm)
                            | (Rs1 << ((xlen - imm) & (xlen - 1))));
                    }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                    0xd: decode RS2 {
                        0x18: ROp::rev8({{
                            if (rvSelect((bool)SHAMT6BIT5, false)) {
                                return std::make_shared<IllegalInstFault>(
                                        "shmat[5] != 0", machInst);
                            }
                            if (machInst.rv_type == RV32) {
                                Rd_sd = _rvk_emu_grev_32(Rs1_sd, 0x18);
                            } else {
                                Rd_sd = _rvk_emu_grev_64(Rs1_sd, 0x38);
                            }
                        }});
                        0x07: ROp::brev8({{
                            if (machInst.rv_type == RV32) {
                                Rd_sd = _rvk_emu_brev8_32(Rs1_sd);
                            } else {
                                Rd_sd = _rvk_emu_brev8_64(Rs1_sd);
                            }
                        }});
                    }
                }
                0x6: ori({{
                    Rd = rvSext(Rs1 | imm);
                }}, uint64_t);
                0x7: andi({{
                    Rd = rvSext(Rs1 & imm);
                }}, uint64_t);
            }
        }

        0x05: UOp::auipc({{
            Rd = rvSext(PC + (sext<20>(imm) << 12));
        }});

        0x06: decode RVTYPE {
            0x1: decode FUNCT3 {
                format IOp {
                    0x0: addiw({{
                        Rd_sw = (int32_t)(Rs1_sw + imm);
                    }}, int32_t);
                    0x1: decode FS3 {
                        0x0: slliw({{
                            Rd_sd = Rs1_sw << imm;
                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                        0x1: slli_uw({{
                            Rd = ((uint64_t)(Rs1_uw)) << imm;
                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
                        0xc: decode FS2 {
                            0x0: clzw({{
                                Rd = clz32(Rs1);
                            }});
                            0x1: ctzw({{
                                Rd = ctz32(Rs1);
                            }});
                            0x2: cpopw({{
                                Rd = popCount(Rs1<31:0>);
                            }});
                        }
                    }
                    0x5: decode FS3 {
                        0x0: srliw({{
                            Rd_sd = (int32_t)(Rs1_uw >> imm);
                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                        0x8: sraiw({{
                            Rd_sd = Rs1_sw >> imm;
                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                        0xc: roriw({{
                            Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
                        }}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
                    }
                }
            }
        }

        0x08: decode FUNCT3 {
            format Store {
                0x0: sb({{
                    Mem_ub = Rs2_ub;
                }});
                0x1: sh({{
                    Mem_uh = Rs2_uh;
                }});
                0x2: sw({{
                    Mem_uw = Rs2_uw;
                }});
                0x3: decode RVTYPE {
                    0x1: sd({{
                        Mem_ud = Rs2_ud;
                    }});
                }
            }
        }

        0x09: decode FUNCT3 {
            format Store {
                0x1: fsh({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                "FPU is off", machInst);

                    Mem_uh = unboxF16(boxF16(Fs2_bits));
                }}, inst_flags=FloatMemWriteOp);
                0x2: fsw({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                "FPU is off", machInst);

                    Mem_uw = unboxF32(boxF32(Fs2_bits));
                }}, inst_flags=FloatMemWriteOp);
                0x3: fsd({{
                    STATUS status = xc->readMiscReg(MISCREG_STATUS);
                    if (status.fs == FPUStatus::OFF)
                        return std::make_shared<IllegalInstFault>(
                                "FPU is off", machInst);

                    Mem_ud = Fs2_bits;
                }}, inst_flags=FloatMemWriteOp);
            }

            0x0: decode MOP {
                0x0: decode SUMOP {
                    0x00: decode NF {
                        0x00: VseOp::vse8_v({{
                            Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                        }}, inst_flags=VectorUnitStrideStoreOp);
                        format VsSegOp {
                            0x01: vsseg2e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x02: vsseg3e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x03: vsseg4e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x04: vsseg5e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x05: vsseg6e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x06: vsseg7e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x07: vsseg8e8_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                        }
                    }
                    format VsWholeOp {
                        0x8: decode NF {
                            0x0: vs1r_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorWholeRegisterStoreOp);
                            0x1: vs2r_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorWholeRegisterStoreOp);
                            0x3: vs4r_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorWholeRegisterStoreOp);
                            0x7: vs8r_v({{
                                Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                            }}, inst_flags=VectorWholeRegisterStoreOp);
                        }
                    }
                    0x0b: VsmOp::vsm_v({{
                        Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
                    }}, inst_flags=VectorUnitStrideMaskStoreOp);
                }
                0x1: VsIndexOp::vsuxei8_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
                0x2: VsStrideOp::vsse8_v({{
                    Mem_vc.as<uint8_t>()[0] = Vs3_ub[microIdx];
                }}, inst_flags=VectorStridedStoreOp);
                0x3: VsIndexOp::vsoxei8_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_ub[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
            }
            0x5: decode MOP {
                0x0: decode SUMOP {
                    0x00: decode NF {
                        0x00: VseOp::vse16_v({{
                            Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                        }}, inst_flags=VectorUnitStrideStoreOp);
                        format VsSegOp {
                            0x01: vsseg2e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x02: vsseg3e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x03: vsseg4e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x04: vsseg5e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x05: vsseg6e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x06: vsseg7e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x07: vsseg8e16_v({{
                                Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                        }
                    }
                }
                0x1: VsIndexOp::vsuxei16_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
                0x2: VsStrideOp::vsse16_v({{
                    Mem_vc.as<uint16_t>()[0] = Vs3_uh[microIdx];
                }}, inst_flags=VectorStridedStoreOp);
                0x3: VsIndexOp::vsoxei16_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_uh[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
            }
            0x6: decode MOP {
                0x0: decode SUMOP {
                    0x00: decode NF {
                        0x00: VseOp::vse32_v({{
                            Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                        }}, inst_flags=VectorUnitStrideStoreOp);
                        format VsSegOp {
                            0x01: vsseg2e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x02: vsseg3e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x03: vsseg4e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x04: vsseg5e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x05: vsseg6e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x06: vsseg7e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x07: vsseg8e32_v({{
                                Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                        }
                    }
                }
                0x1: VsIndexOp::vsuxei32_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
                0x2: VsStrideOp::vsse32_v({{
                    Mem_vc.as<uint32_t>()[0] = Vs3_uw[microIdx];
                }}, inst_flags=VectorStridedStoreOp);
                0x3: VsIndexOp::vsoxei32_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_uw[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
            }
            0x7: decode MOP {
                0x0: decode SUMOP {
                    0x00: decode NF {
                        0x00: VseOp::vse64_v({{
                            Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                        }}, inst_flags=VectorUnitStrideStoreOp);
                        format VsSegOp {
                            0x01: vsseg2e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x02: vsseg3e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x03: vsseg4e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x04: vsseg5e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x05: vsseg6e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x06: vsseg7e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                            0x07: vsseg8e64_v({{
                                Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
                            }}, inst_flags=VectorUnitStrideSegmentedStoreOp);
                        }
                    }
                }
                0x1: VsIndexOp::vsuxei64_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
                0x2: VsStrideOp::vsse64_v({{
                    Mem_vc.as<uint64_t>()[0] = Vs3_ud[microIdx];
                }}, inst_flags=VectorStridedStoreOp);
                0x3: VsIndexOp::vsoxei64_v({{
                    Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
                }}, {{
                    EA = Rs1 + Vs2_ud[vs2ElemIdx];
                }}, inst_flags=VectorIndexedStoreOp);
            }
        }

        0x0b: decode FUNCT3 {
            0x2: decode AMOFUNCT {
                0x2: LoadReserved::lr_w({{
                    Rd_sd = Mem_sw;
                }}, mem_flags=LLSC);
                0x3: StoreCond::sc_w({{
                    Mem_uw = Rs2_uw;
                }}, {{
                    Rd = rvSext(result);
                }}, inst_flags=IsStoreConditional, mem_flags=LLSC);
                0x0: AtomicMemOp::amoadd_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<int32_t> *amo_op =
                          new AtomicGenericOp<int32_t>(Rs2_sw,
                                  [](int32_t* b, int32_t a){ *b += a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x1: AtomicMemOp::amoswap_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                          new AtomicGenericOp<uint32_t>(Rs2_uw,
                                  [](uint32_t* b, uint32_t a){ *b = a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x4: AtomicMemOp::amoxor_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                          new AtomicGenericOp<uint32_t>(Rs2_uw,
                                  [](uint32_t* b, uint32_t a){ *b ^= a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x8: AtomicMemOp::amoor_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                          new AtomicGenericOp<uint32_t>(Rs2_uw,
                                  [](uint32_t* b, uint32_t a){ *b |= a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0xc: AtomicMemOp::amoand_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                          new AtomicGenericOp<uint32_t>(Rs2_uw,
                                  [](uint32_t* b, uint32_t a){ *b &= a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x10: AtomicMemOp::amomin_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<int32_t> *amo_op =
                      new AtomicGenericOp<int32_t>(Rs2_sw,
                        [](int32_t* b, int32_t a){ if (a < *b) *b = a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x14: AtomicMemOp::amomax_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<int32_t> *amo_op =
                      new AtomicGenericOp<int32_t>(Rs2_sw,
                        [](int32_t* b, int32_t a){ if (a > *b) *b = a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x18: AtomicMemOp::amominu_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                      new AtomicGenericOp<uint32_t>(Rs2_uw,
                        [](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
                0x1c: AtomicMemOp::amomaxu_w({{
                    Rd_sd = Mem_sw;
                }}, {{
                    TypedAtomicOpFunctor<uint32_t> *amo_op =
                      new AtomicGenericOp<uint32_t>(Rs2_uw,
                        [](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
                }}, mem_flags=ATOMIC_RETURN_OP);
            }
            0x3: decode RVTYPE {
                0x1: decode AMOFUNCT {
                    0x2: LoadReserved::lr_d({{
                        Rd_sd = Mem_sd;
                    }}, mem_flags=LLSC);
                    0x3: StoreCond::sc_d({{
                        Mem = Rs2;
                    }}, {{
                        Rd = result;
                    }}, mem_flags=LLSC, inst_flags=IsStoreConditional);
                    0x0: AtomicMemOp::amoadd_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<int64_t> *amo_op =
                              new AtomicGenericOp<int64_t>(Rs2_sd,
                                      [](int64_t* b, int64_t a){ *b += a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x1: AtomicMemOp::amoswap_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                              new AtomicGenericOp<uint64_t>(Rs2_ud,
                                      [](uint64_t* b, uint64_t a){ *b = a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x4: AtomicMemOp::amoxor_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                              new AtomicGenericOp<uint64_t>(Rs2_ud,
                                     [](uint64_t* b, uint64_t a){ *b ^= a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x8: AtomicMemOp::amoor_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                              new AtomicGenericOp<uint64_t>(Rs2_ud,
                                     [](uint64_t* b, uint64_t a){ *b |= a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0xc: AtomicMemOp::amoand_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                              new AtomicGenericOp<uint64_t>(Rs2_ud,
                                     [](uint64_t* b, uint64_t a){ *b &= a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x10: AtomicMemOp::amomin_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<int64_t> *amo_op =
                          new AtomicGenericOp<int64_t>(Rs2_sd,
                            [](int64_t* b, int64_t a){ if (a < *b) *b = a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x14: AtomicMemOp::amomax_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<int64_t> *amo_op =
                          new AtomicGenericOp<int64_t>(Rs2_sd,
                            [](int64_t* b, int64_t a){ if (a > *b) *b = a; });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x18: AtomicMemOp::amominu_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                          new AtomicGenericOp<uint64_t>(Rs2_ud,
                            [](uint64_t* b, uint64_t a){
                              if (a < *b) *b = a;
                            });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                    0x1c: AtomicMemOp::amomaxu_d({{
                        Rd_sd = Mem_sd;
                    }}, {{
                        TypedAtomicOpFunctor<uint64_t> *amo_op =
                          new AtomicGenericOp<uint64_t>(Rs2_ud,
                            [](uint64_t* b, uint64_t a){
                              if (a > *b) *b = a;
                            });
                    }}, mem_flags=ATOMIC_RETURN_OP);
                }
            }
        }
        0x0c: decode FUNCT3 {
            format ROp {
                0x0: decode KFUNCT5 {
                    0x00: decode BS {
                        0x0: add({{
                            Rd = rvSext(Rs1_sd + Rs2_sd);
                        }});
                        0x1: sub({{
                            Rd = rvSext(Rs1_sd - Rs2_sd);
                        }});
                    }
                    0x01: decode BS {
                        0x0: mul({{
                            Rd = rvSext(Rs1_sd * Rs2_sd);
                        }}, IntMultOp);
                    }
                    0x08: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sum0r({{
                                Rd_sw = _rvk_emu_sha512sum0r(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x09: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sum1r({{
                                Rd_sw = _rvk_emu_sha512sum1r(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x0a: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sig0l({{
                                Rd_sw = _rvk_emu_sha512sig0l(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x0b: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sig1l({{
                                Rd_sw = _rvk_emu_sha512sig1l(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x0e: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sig0h({{
                                Rd_sw = _rvk_emu_sha512sig0h(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x0f: decode BS {
                        0x1: decode RVTYPE {
                            0x0: sha512sig1h({{
                                Rd_sw = _rvk_emu_sha512sig1h(Rs1_sw, Rs2_sw);
                            }});
                        }
                    }
                    0x11: decode RVTYPE {
                        0x0: BSOp::aes32esi({{
                            Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, bs);
                        }});
                    }
                    0x13: decode RVTYPE {
                        0x0: BSOp::aes32esmi({{
                            Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, bs);
                        }});
                    }
                    0x15: decode RVTYPE {
                        0x0: BSOp::aes32dsi({{
                            Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, bs);
                        }});
                    }
                    0x17: decode RVTYPE {
                        0x0: BSOp::aes32dsmi({{
                            Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, bs);
                        }});
                    }
                    0x18: BSOp::sm4ed({{
                        Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, bs);
                    }});
                    0x19: decode BS {
                        0x0: decode RVTYPE {
                            0x1: aes64es({{
                                Rd_sd = _rvk_emu_aes64es(Rs1_sd, Rs2_sd);
                            }});
                        }
                    }
                    0x1a: BSOp::sm4ks({{
                        Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, bs);
                    }});
                    0x1b: decode BS {
                        0x0: decode RVTYPE {
                            0x1: aes64esm({{
                                Rd_sd = _rvk_emu_aes64esm(Rs1_sd, Rs2_sd);
                            }});
                        }
                    }
                    0x1d: decode BS {
                        0x0: decode RVTYPE {
                            0x1: aes64ds({{
                                Rd_sd = _rvk_emu_aes64ds(Rs1_sd, Rs2_sd);
                            }});
                        }
                    }
                    0x1f: decode BS {
                        0x0: decode RVTYPE {
                            0x1: aes64dsm({{
                                Rd_sd = _rvk_emu_aes64dsm(Rs1_sd, Rs2_sd);
                            }});
                        }
                        0x1: decode RVTYPE {
                            0x1: aes64ks2({{
                                Rd_sd = _rvk_emu_aes64ks2(Rs1_sd, Rs2_sd);
                            }});
                        }
                    }
                }
                0x1: decode FUNCT7 {
                    0x0: sll({{
                        Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
                    }});
                    0x1: mulh({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = mulh<int32_t>(Rs1_sd, Rs2_sd);
                        } else {
                            Rd_sd = mulh<int64_t>(Rs1_sd, Rs2_sd);
                        }
                    }}, IntMultOp);
                    0x5: clmul({{
                        uint64_t result = 0;
                        for (int i = 0; i < rvSelect(32, 64); i++) {
                            if ((Rs2 >> i) & 1) {
                                result ^= Rs1 << i;
                            }
                        }
                        Rd = rvSext(result);
                    }});
                    0x14: bset({{
                        Rs2 &= rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 | (UINT64_C(1) << Rs2));
                    }});
                    0x24: bclr({{
                        Rs2 &= rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 & (~(UINT64_C(1) << Rs2)));
                    }});
                    0x30: rol({{
                        uint64_t xlen = rvSelect(32, 64);
                        int shamt = Rs2 & (xlen - 1);
                        Rd = rvSext((Rs1 << shamt)
                            | (rvZext(Rs1) >> ((xlen - shamt) & (xlen - 1))));
                    }});
                    0x34: binv({{
                        Rs2 &= rvSelect(32 - 1, 64 - 1);
                        Rd = rvSext(Rs1 ^ (UINT64_C(1) << Rs2));
                    }});
                }
                0x2: decode FUNCT7 {
                    0x0: slt({{
                        Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
                    }});
                    0x1: mulhsu({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = mulhsu<int32_t>(Rs1_sd, Rs2);
                        } else {
                            Rd_sd = mulhsu<int64_t>(Rs1_sd, Rs2);
                        }
                    }}, IntMultOp);
                    0x5: clmulr({{
                        uint64_t result = 0;
                        uint64_t xlen = rvSelect(32, 64);
                        uint64_t zextRs1 = rvZext(Rs1);
                        for (int i = 0; i < xlen; i++) {
                            if ((Rs2 >> i) & 1) {
                                result ^= zextRs1 >> (xlen-i-1);
                            }
                        }
                        Rd = rvSext(result);
                    }});
                    0x10: sh1add({{
                        Rd = rvSext((Rs1 << 1) + Rs2);
                    }});
                    0x14: xperm4({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = _rvk_emu_xperm4_32(Rs1_sd, Rs2_sd);
                        } else {
                            Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
                        }
                    }});
                }
                0x3: decode FUNCT7 {
                    0x0: sltu({{
                        Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
                    }});
                    0x1: mulhu({{
                        if (machInst.rv_type == RV32) {
                            Rd = (int32_t)mulhu<uint32_t>(Rs1, Rs2);
                        } else {
                            Rd = mulhu<uint64_t>(Rs1, Rs2);
                        }
                    }}, IntMultOp);
                    0x5: clmulh({{
                        uint64_t result = 0;
                        uint64_t xlen = rvSelect(32, 64);
                        uint64_t zextRs1 = rvZext(Rs1);
                        for (int i = 1; i < xlen; i++) {
                            if ((Rs2 >> i) & 1) {
                                result ^= zextRs1 >> (xlen-i);
                            }
                        }
                        // The MSB can never be 1, no need to sign extend.
                        Rd = result;
                    }});
                }
                0x4: decode FUNCT7 {
                    0x0: xor({{
                        Rd = rvSext(Rs1 ^ Rs2);
                    }});
                    0x1: div({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = div<int32_t>(Rs1, Rs2);
                        } else {
                            Rd_sd = div<int64_t>(Rs1, Rs2);
                        }
                    }}, IntDivOp);
                    0x4: pack({{
                        int xlen = rvSelect(32, 64);
                        Rd = rvSext(
                            (bits(Rs2, xlen/2-1, 0) << (xlen / 2)) | \
                            bits(Rs1, xlen/2-1, 0)
                        );
                    }});
                    0x5: min({{
                        Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd));
                    }});
                    0x10: sh2add({{
                        Rd = rvSext((Rs1 << 2) + Rs2);
                    }});
                    0x14: xperm8({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = _rvk_emu_xperm8_32(Rs1_sd, Rs2_sd);
                        } else {
                            Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
                        }
                    }});
                    0x20: xnor({{
                        Rd = rvSext(~(Rs1 ^ Rs2));
                    }});
                }
                0x5: decode FUNCT7 {
                    0x0: srl({{
                        Rd = rvSext(rvZext(Rs1) >>
                                    rvSelect(Rs2<4:0>, Rs2<5:0>));
                    }});
                    0x1: divu({{
                        if (machInst.rv_type == RV32) {
                            Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
                        } else {
                            Rd = divu<uint64_t>(Rs1, Rs2);
                        }
                    }}, IntDivOp);
                    0x20: sra({{
                        Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
                    }});
                    0x5: minu({{
                        Rd = rvSext(std::min(rvZext(Rs1), rvZext(Rs2)));
                    }});
                    0x24: bext({{
                        Rs2 &= (rvSelect(32, 64) - 1);
                        // It doesn't need to sign ext because MSB is always 0
                        Rd = (Rs1 >> Rs2) & 0x1;
                    }});
                    0x30: ror({{
                        uint64_t xlen = rvSelect(32, 64);
                        int shamt = Rs2 & (xlen - 1);
                        Rd = rvSext((rvZext(Rs1) >> shamt)
                            | (Rs1 << ((xlen - shamt) & (xlen - 1))));
                    }});
                }
                0x6: decode FUNCT7 {
                    0x0: or({{
                        Rd = rvSext(Rs1 | Rs2);
                    }});
                    0x1: rem({{
                        if (machInst.rv_type == RV32) {
                            Rd_sd = rem<int32_t>(Rs1, Rs2);
                        } else {
                            Rd_sd = rem<int64_t>(Rs1, Rs2);
                        }
                    }}, IntDivOp);
                    0x5: max({{
                        Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
                    }});
                    0x10: sh3add({{
                        Rd = rvSext((Rs1 << 3) + Rs2);
                    }});
                    0x20: orn({{
                        Rd = rvSext(Rs1 | (~Rs2));
                    }});
                }
                0x7: decode FUNCT7 {
                    0x0: and({{
                        Rd = rvSext(Rs1 & Rs2);
                    }});
                    0x1: remu({{
                        if (machInst.rv_type == RV32) {
                            Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
                        } else {
                            Rd = remu<uint64_t>(Rs1, Rs2);
                        }
                    }}, IntDivOp);
                    0x4: packh({{
                        // It doesn't need to sign ext as MSB is always 0
                        Rd = (Rs2_ub << 8) | Rs1_ub;
                    }});
                    0x5: maxu({{
                        Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2)));
                    }});
                    0x20: andn({{
                        Rd = rvSext(Rs1 & (~Rs2));
                    }});
                }
            }
        }

        0x0d: UOp::lui({{
            Rd = (sext<20>(imm) << 12);
        }});

        0x0e: decode RVTYPE {
            0x1: decode FUNCT3 {
                format ROp {
                    0x0: decode FUNCT7 {
                        0x0: addw({{
                            Rd_sd = Rs1_sw + Rs2_sw;
                        }});
                        0x1: mulw({{
                            Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
                        }}, IntMultOp);
                        0x4: add_uw({{
                            Rd = Rs1_uw + Rs2;
                        }});
                        0x20: subw({{
                            Rd_sd = Rs1_sw - Rs2_sw;
                        }});
                    }
                    0x1: decode FUNCT7 {
                        0x0: sllw({{
                            Rd_sd = Rs1_sw << Rs2<4:0>;
                        }});
                        0x30: rolw({{
                            int shamt = Rs2 & (32 - 1);
                            Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
                        }});
                    }
                    0x2: decode FUNCT7 {
                        0x10: sh1add_uw({{
                            Rd = (((uint64_t)Rs1_uw) << 1) + Rs2;
                        }});
                    }
                    0x4: decode FUNCT7 {
                        0x1: divw({{
                            Rd_sd = div<int32_t>(Rs1, Rs2);
                        }}, IntDivOp);
                        0x4: packw({{
                            Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
                        }});
                        0x10: sh2add_uw({{
                            Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;
                        }});
                    }
                    0x5: decode FUNCT7 {
                        0x0: srlw({{
                            Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
                        }});
                        0x1: divuw({{
                            Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
                        }}, IntDivOp);
                        0x20: sraw({{
                            Rd_sd = Rs1_sw >> Rs2<4:0>;
                        }});
                        0x30: rorw({{
                            int shamt = Rs2 & (32 - 1);
                            Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
                        }});
                    }
                    0x6:  decode FUNCT7 {
                        0x1: remw({{
                            Rd_sd = rem<int32_t>(Rs1, Rs2);
                        }}, IntDivOp);
                        0x10: sh3add_uw({{
                            Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
                        }});
                    }
                    0x7: remuw({{
                        Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
                    }}, IntDivOp);
                }
            }
        }

        format FPROp {
            0x10: decode FUNCT2 {
                0x0: fmadd_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
                                         f32(freg(Fs2_bits)),
                                         f32(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x1: fmadd_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
                                         f64(freg(Fs2_bits)),
                                         f64(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x2: fmadd_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
                                         f16(freg(Fs2_bits)),
                                         f16(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
            }
            0x11: decode FUNCT2 {
                0x0: fmsub_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
                                    f32(freg(Fs2_bits)),
                                    f32(f32(freg(Fs3_bits)).v ^
                                        mask(31, 31))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x1: fmsub_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
                                    f64(freg(Fs2_bits)),
                                    f64(f64(freg(Fs3_bits)).v ^
                                        mask(63, 63))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x2: fmsub_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
                                    f16(freg(Fs2_bits)),
                                    f16(f16(freg(Fs3_bits)).v ^
                                        mask(15, 15))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
            }
            0x12: decode FUNCT2 {
                0x0: fnmsub_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
                                             mask(31, 31)),
                                         f32(freg(Fs2_bits)),
                                         f32(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x1: fnmsub_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
                                             mask(63, 63)),
                                         f64(freg(Fs2_bits)),
                                         f64(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x2: fnmsub_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
                                             mask(15, 15)),
                                         f16(freg(Fs2_bits)),
                                         f16(freg(Fs3_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
            }
            0x13: decode FUNCT2 {
                0x0: fnmadd_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
                                             mask(31, 31)),
                                    f32(freg(Fs2_bits)),
                                    f32(f32(freg(Fs3_bits)).v ^
                                        mask(31, 31))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x1: fnmadd_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
                                             mask(63, 63)),
                                    f64(freg(Fs2_bits)),
                                    f64(f64(freg(Fs3_bits)).v ^
                                        mask(63, 63))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
                0x2: fnmadd_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
                                             mask(15, 15)),
                                    f16(freg(Fs2_bits)),
                                    f16(f16(freg(Fs3_bits)).v ^
                                        mask(15, 15))));
                    Fd_bits = fd.v;
                }}, FloatMultAccOp);
            }
            0x14: decode FUNCT7 {
                0x0: fadd_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_add(f32(freg(Fs1_bits)),
                                      f32(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x1: fadd_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_add(f64(freg(Fs1_bits)),
                                      f64(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x2: fadd_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_add(f16(freg(Fs1_bits)),
                                      f16(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x4: fsub_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_sub(f32(freg(Fs1_bits)),
                                      f32(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x5: fsub_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_sub(f64(freg(Fs1_bits)),
                                      f64(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x6: fsub_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_sub(f16(freg(Fs1_bits)),
                                      f16(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatAddOp);
                0x8: fmul_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_mul(f32(freg(Fs1_bits)),
                                      f32(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultOp);
                0x9: fmul_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_mul(f64(freg(Fs1_bits)),
                                      f64(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultOp);
                0xa: fmul_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_mul(f16(freg(Fs1_bits)),
                                      f16(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatMultOp);
                0xc: fdiv_s({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f32_div(f32(freg(Fs1_bits)),
                                      f32(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatDivOp);
                0xd: fdiv_d({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f64_div(f64(freg(Fs1_bits)),
                                      f64(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatDivOp);
                0xe: fdiv_h({{
                    RM_REQUIRED;
                    freg_t fd;
                    fd = freg(f16_div(f16(freg(Fs1_bits)),
                                      f16(freg(Fs2_bits))));
                    Fd_bits = fd.v;
                }}, FloatDivOp);
                0x10: decode ROUND_MODE {
                    0x0: fsgnj_s({{
                        auto sign = bits(unboxF32(Fs2_bits), 31);
                        Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
                                                    sign));
                        }}, FloatMiscOp);
                    0x1: fsgnjn_s({{
                        auto sign = ~bits(unboxF32(Fs2_bits), 31);
                        Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
                                                    sign));
                        }}, FloatMiscOp);
                    0x2: fsgnjx_s({{
                        auto sign = bits(
                            unboxF32(Fs1_bits) ^ unboxF32(Fs2_bits), 31);
                        Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
                                                    sign));
                        }}, FloatMiscOp);
                }
                0x11: decode ROUND_MODE {
                    0x0: fsgnj_d({{
                        Fd_bits = insertBits(Fs2_bits, 62, 0, Fs1_bits);
                    }}, FloatMiscOp);
                    0x1: fsgnjn_d({{
                        Fd_bits = insertBits(~Fs2_bits, 62, 0, Fs1_bits);
                    }}, FloatMiscOp);
                    0x2: fsgnjx_d({{
                        Fd_bits = insertBits(
                                Fs1_bits ^ Fs2_bits, 62, 0, Fs1_bits);
                    }}, FloatMiscOp);
                }
                0x12: decode ROUND_MODE {
                    0x0: fsgnj_h({{
                        auto sign = bits(unboxF16(Fs2_bits), 15);
                        Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
                                                    sign));
                        }}, FloatMiscOp);
                    0x1: fsgnjn_h({{
                        auto sign = ~bits(unboxF16(Fs2_bits), 15);
                        Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
                                                    sign));
                        }}, FloatMiscOp);
                    0x2: fsgnjx_h({{
                        auto sign = bits(
                            unboxF16(Fs1_bits) ^ unboxF16(Fs2_bits), 15);
                        Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
                                                    sign));
                        }}, FloatMiscOp);
                }
                0x14: decode ROUND_MODE {
                    0x0: fmin_s({{
                        float32_t fs1 = f32(freg(Fs1_bits));
                        float32_t fs2 = f32(freg(Fs2_bits));
                        float32_t fd;
                        bool less = f32_lt_quiet(fs1, fs2) ||
                            (f32_eq(fs1, fs2) && bits(fs1.v, 31));

                        fd = less || isNaNF32UI(fs2.v) ? fs1 : fs2;
                        if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
                            fd = f32(defaultNaNF32UI);
                        Fd_bits = freg(fd).v;
                        }}, FloatCmpOp);
                    0x1: fmax_s({{
                        float32_t fs1 = f32(freg(Fs1_bits));
                        float32_t fs2 = f32(freg(Fs2_bits));
                        float32_t fd;
                        bool greater = f32_lt_quiet(fs2, fs1) ||
                            (f32_eq(fs2, fs1) && bits(fs2.v, 31));

                        fd = greater || isNaNF32UI(fs2.v) ? fs1: fs2;
                        if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
                            fd = f32(defaultNaNF32UI);
                        Fd_bits = freg(fd).v;
                        }}, FloatCmpOp);
                }
                0x15: decode ROUND_MODE {
                    0x0: fmin_d({{
                        float64_t fs1 = f64(freg(Fs1_bits));
                        float64_t fs2 = f64(freg(Fs2_bits));
                        float64_t fd;
                        bool less = f64_lt_quiet(fs1, fs2) ||
                            (f64_eq(fs1, fs2) && bits(fs1.v, 63));

                        fd = less || isNaNF64UI(fs2.v) ? fs1 : fs2;
                        if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
                            fd = f64(defaultNaNF64UI);
                        Fd_bits = freg(fd).v;
                    }}, FloatCmpOp);
                    0x1: fmax_d({{
                        float64_t fs1 = f64(freg(Fs1_bits));
                        float64_t fs2 = f64(freg(Fs2_bits));
                        float64_t fd;
                        bool greater = f64_lt_quiet(fs2, fs1) ||
                            (f64_eq(fs2, fs1) && bits(fs2.v, 63));

                        fd = greater || isNaNF64UI(fs2.v) ? fs1 : fs2;
                        if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
                            fd = f64(defaultNaNF64UI);
                        Fd_bits = freg(fd).v;
                    }}, FloatCmpOp);
                }
                0x16: decode ROUND_MODE {
                    0x0: fmin_h({{
                        float16_t fs1 = f16(freg(Fs1_bits));
                        float16_t fs2 = f16(freg(Fs2_bits));
                        float16_t fd;
                        bool less = f16_lt_quiet(fs1, fs2) ||
                            (f16_eq(fs1, fs2) && bits(fs1.v, 15));

                        fd = less || isNaNF16UI(fs2.v) ? fs1 : fs2;
                        if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
                            fd = f16(defaultNaNF16UI);
                        Fd_bits = freg(fd).v;
                        }}, FloatCmpOp);
                    0x1: fmax_h({{
                        float16_t fs1 = f16(freg(Fs1_bits));
                        float16_t fs2 = f16(freg(Fs2_bits));
                        float16_t fd;
                        bool greater = f16_lt_quiet(fs2, fs1) ||
                            (f16_eq(fs2, fs1) && bits(fs2.v, 15));

                        fd = greater || isNaNF16UI(fs2.v) ? fs1 : fs2;
                        if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
                            fd = f16(defaultNaNF16UI);
                        Fd_bits = freg(fd).v;
                        }}, FloatCmpOp);
                }
                0x20: decode CONV_SGN {
                    0x1: fcvt_s_d({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f64_to_f32(f64(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                    0x2: fcvt_s_h({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f16_to_f32(f16(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                }
                0x21: decode CONV_SGN {
                    0x0: fcvt_d_s({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f32_to_f64(f32(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                    0x2: fcvt_d_h({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f16_to_f64(f16(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                }
                0x22: decode CONV_SGN {
                    0x0: fcvt_h_s({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f32_to_f16(f32(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                    0x1: fcvt_h_d({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(f64_to_f16(f64(freg(Fs1_bits))));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                }
                0x2c: fsqrt_s({{
                    if (RS2 != 0) {
                        return std::make_shared<IllegalInstFault>(
                                "source reg x1", machInst);
                    }
                    freg_t fd;
                    RM_REQUIRED;
                    fd = freg(f32_sqrt(f32(freg(Fs1_bits))));
                    Fd_bits = fd.v;
                }}, FloatSqrtOp);
                0x2d: fsqrt_d({{
                    if (RS2 != 0) {
                        return std::make_shared<IllegalInstFault>(
                                "source reg x1", machInst);
                    }
                    freg_t fd;
                    RM_REQUIRED;
                    fd = freg(f64_sqrt(f64(freg(Fs1_bits))));
                    Fd_bits = fd.v;
                }}, FloatSqrtOp);
                0x2e: fsqrt_h({{
                    if (RS2 != 0) {
                        return std::make_shared<IllegalInstFault>(
                                "source reg x1", machInst);
                    }
                    freg_t fd;
                    RM_REQUIRED;
                    fd = freg(f16_sqrt(f16(freg(Fs1_bits))));
                    Fd_bits = fd.v;
                }}, FloatSqrtOp);
                0x50: decode ROUND_MODE {
                    0x0: fle_s({{
                        Rd = f32_le(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x1: flt_s({{
                        Rd = f32_lt(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x2: feq_s({{
                        Rd = f32_eq(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                }
                0x51: decode ROUND_MODE {
                    0x0: fle_d({{
                        Rd = f64_le(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x1: flt_d({{
                        Rd = f64_lt(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x2: feq_d({{
                        Rd = f64_eq(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                }
                0x52: decode ROUND_MODE {
                    0x0: fle_h({{
                        Rd = f16_le(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x1: flt_h({{
                        Rd = f16_lt(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                    0x2: feq_h({{
                        Rd = f16_eq(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
                    }}, FloatCmpOp);
                }
                0x60: decode CONV_SGN {
                    0x0: fcvt_w_s({{
                        RM_REQUIRED;
                        Rd_sd = sext<32>(f32_to_i32(f32(freg(Fs1_bits)), rm,
                                                    true));
                    }}, FloatCvtOp);
                    0x1: fcvt_wu_s({{
                        RM_REQUIRED;
                        Rd = sext<32>(f32_to_ui32(f32(freg(Fs1_bits)), rm,
                                                  true));
                    }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_l_s({{
                            RM_REQUIRED;
                            Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_lu_s({{
                            RM_REQUIRED;
                            Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                }
                0x61: decode CONV_SGN {
                    0x0: fcvt_w_d({{
                        RM_REQUIRED;
                        Rd_sd = sext<32>(f64_to_i32(f64(freg(Fs1_bits)), rm,
                                                    true));
                    }}, FloatCvtOp);
                    0x1: fcvt_wu_d({{
                        RM_REQUIRED;
                        Rd = sext<32>(f64_to_ui32(f64(freg(Fs1_bits)), rm,
                                                  true));
                    }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_l_d({{
                            RM_REQUIRED;
                            Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_lu_d({{
                            RM_REQUIRED;
                            Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                }
                0x62: decode CONV_SGN {
                    0x0: fcvt_w_h({{
                        RM_REQUIRED;
                        Rd_sd = sext<32>(f16_to_i32(f16(freg(Fs1_bits)), rm,
                                                    true));
                    }}, FloatCvtOp);
                    0x1: fcvt_wu_h({{
                        RM_REQUIRED;
                        Rd = sext<32>(f16_to_ui32(f16(freg(Fs1_bits)), rm,
                                                  true));
                    }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_l_h({{
                            RM_REQUIRED;
                            Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_lu_h({{
                            RM_REQUIRED;
                            Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true);
                        }}, FloatCvtOp);
                    }
                }
                0x68: decode CONV_SGN {
                    0x0: fcvt_s_w({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(i32_to_f32(Rs1_sw));
                        Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    0x1: fcvt_s_wu({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(ui32_to_f32(Rs1_uw));
                        Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_s_l({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(i64_to_f32(Rs1_ud));
                            Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_s_lu({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(ui64_to_f32(Rs1));
                            Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    }
                }
                0x69: decode CONV_SGN {
                    0x0: fcvt_d_w({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(i32_to_f64(Rs1_sw));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                    0x1: fcvt_d_wu({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(ui32_to_f64(Rs1_uw));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_d_l({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(i64_to_f64(Rs1_sd));
                            Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_d_lu({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(ui64_to_f64(Rs1));
                            Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    }
                }
                0x6a: decode CONV_SGN {
                    0x0: fcvt_h_w({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(i32_to_f16((int32_t)Rs1_sw));
                        Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    0x1: fcvt_h_wu({{
                        RM_REQUIRED;
                        freg_t fd;
                        fd = freg(ui32_to_f16((uint32_t)Rs1_uw));
                        Fd_bits = fd.v;
                        }}, FloatCvtOp);
                    0x2: decode RVTYPE {
                        0x1: fcvt_h_l({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(i64_to_f16(Rs1_ud));
                            Fd_bits = fd.v;
                            }}, FloatCvtOp);
                    }
                    0x3: decode RVTYPE {
                        0x1: fcvt_h_lu({{
                            RM_REQUIRED;
                            freg_t fd;
                            fd = freg(ui64_to_f16(Rs1));
                            Fd_bits = fd.v;
                            }}, FloatCvtOp);
                    }
                }
                0x70: decode ROUND_MODE {
                    0x0: fmv_x_w({{
                        uint64_t result = (uint32_t)Fs1_bits;
                        if ((result&0x80000000) != 0) {
                            result |= (0xFFFFFFFFULL << 32);
                        }
                        Rd = result;
                    }}, FloatCvtOp);
                    0x1: fclass_s({{
                        Rd = rvSext(f32_classify(f32(freg(Fs1_bits))));
                    }}, FloatMiscOp);
                }
                0x71: decode ROUND_MODE {
                    0x0: decode RVTYPE {
                        0x1: fmv_x_d({{
                            Rd = freg(Fs1_bits).v;
                        }}, FloatCvtOp);
                    }
                    0x1: fclass_d({{
                        Rd = f64_classify(f64(freg(Fs1_bits)));
                    }}, FloatMiscOp);
                }
                0x72: decode ROUND_MODE {
                    0x0: fmv_x_h({{
                        uint64_t result = (uint16_t)Fs1_bits;
                        if ((result&0x8000) != 0) {
                            result |= (0xFFFFFFFFFFFFULL << 16);
                        }
                        Rd = result;
                    }}, FloatCvtOp);
                    0x1: fclass_h({{
                        Rd = f16_classify(f16(freg(Fs1_bits)));
                    }}, FloatMiscOp);
                }
                0x78: fmv_w_x({{
                    freg_t fd;
                    fd = freg(f32(Rs1_uw));
                    Fd_bits = fd.v;
                }}, FloatCvtOp);
                0x79: decode RVTYPE {
                    0x1: fmv_d_x({{
                        freg_t fd;
                        fd = freg(f64(Rs1));
                        Fd_bits = fd.v;
                    }}, FloatCvtOp);
                }
                0x7a: fmv_h_x({{
                    freg_t fd;
                    fd = freg(f16(Rs1_uh));
                    Fd_bits = fd.v;
                }}, FloatCvtOp);
            }
        }

        0x15: decode FUNCT3 {
            // OPIVV
            0x0: decode VFUNCT6 {
                format VectorIntFormat {
                    0x0: vadd_vv({{
                        Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2: vsub_vv({{
                        Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x4: vminu_vv({{
                        Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ?
                                Vs2_vu[i] : Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x5: vmin_vv({{
                        Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ?
                                Vs2_vi[i] : Vs1_vi[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x6: vmaxu_vv({{
                        Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ?
                                Vs2_vu[i] : Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x7: vmax_vv({{
                        Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ?
                                Vs2_vi[i] : Vs1_vi[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0x9: vand_vv({{
                        Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0xa: vor_vv({{
                        Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                    0xb: vxor_vv({{
                        Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i];
                    }}, OPIVV, VectorIntegerArithOp);
                }
                0x0c: VectorGatherFormat::vrgather_vv({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
                        if (this->vm || elem_mask(v0, ei)) {
                            const uint64_t idx = Vs1_vu[i]
                                - vs2_elems * vs2_idx;
                            auto res = (Vs1_vu[i] >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i];
                            Vd_vu[i] = res;
                        }
                    }
                }}, OPIVV, VectorMiscOp);
                0x0e: VectorGatherFormat::vrgatherei16_vv({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
                        if (this->vm || elem_mask(v0, ei)) {
                            const uint32_t idx = Vs1_uh[i + vs1_bias]
                                - vs2_elems * vs2_idx;
                            auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i + vd_bias];
                            Vd_vu[i + vd_bias] = res;
                        }
                    }
                }}, OPIVV, VectorMiscOp);
                format VectorIntFormat {
                    0x10: decode VM {
                        0x0: vadc_vvm({{
                            Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i]
                                    + elem_mask(v0, ei);
                        }}, OPIVV, VectorIntegerArithOp);
                        // the unmasked versions (vm=1) are reserved
                    }
                    0x12: decode VM {
                        0x0: vsbc_vvm({{
                            Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i]
                                    - elem_mask(v0, ei);
                        }}, OPIVV, VectorIntegerArithOp);
                        // the unmasked versions (vm=1) are reserved
                    }
                    0x17: decode VM {
                        0x0: vmerge_vvm({{
                            Vd_vu[i] = elem_mask(v0, ei)
                                    ? Vs1_vu[i]
                                    : Vs2_vu[i];
                        }}, OPIVV, VectorIntegerArithOp);
                        0x1: decode VS2 {
                            0x0: vmv_v_v({{
                                Vd_vu[i] = Vs1_vu[i];
                            }}, OPIVV, VectorIntegerArithOp);
                        }
                    }
                }
                format VectorIntVxsatFormat{
                    0x20: vsaddu_vv({{
                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Vs1_vu[i],
                            vxsatptr);
                    }}, OPIVV, VectorIntegerArithOp);
                    0x21: vsadd_vv({{
                        Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Vs1_vu[i],
                            vxsatptr);
                    }}, OPIVV, VectorIntegerArithOp);
                    0x22: vssubu_vv({{
                        Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Vs1_vu[i],
                            vxsatptr);
                    }}, OPIVV, VectorIntegerArithOp);
                    0x23: vssub_vv({{
                        Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Vs1_vu[i],
                            vxsatptr);
                    }}, OPIVV, VectorIntegerArithOp);
                    0x27: vsmul_vv({{
                        vi max = std::numeric_limits<vi>::max();
                        vi min = std::numeric_limits<vi>::min();
                        bool overflow = Vs1_vi[i] == Vs2_vi[i] &&
                                        Vs1_vi[i] == min;
                        __int128_t result = (__int128_t)Vs1_vi[i] *
                                            (__int128_t)Vs2_vi[i];
                        result = int_rounding<__int128_t>(
                            result, 0 /* TODO */, sew - 1);
                        result = result >> (sew - 1);
                        if (overflow) {
                            result = max;
                            *vxsatptr = true;
                        }

                        Vd_vi[i] = (vi)result;
                    }}, OPIVV, VectorIntegerArithOp);
                }
                format VectorIntFormat {
                    0x25: vsll_vv({{
                        Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x28: vsrl_vv({{
                        Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x29: vsra_vv({{
                        Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2a: vssrl_vv({{
                        int sh = Vs1_vu[i] & (sew - 1);
                        __uint128_t val = Vs2_vu[i];

                        val = int_rounding<__uint128_t>(val,
                            xc->readMiscReg(MISCREG_VXRM), sh);
                        Vd_vu[i] = val >> sh;
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2b: vssra_vv({{
                        int sh = Vs1_vi[i] & (sew - 1);
                        __int128_t val = Vs2_vi[i];

                        val = int_rounding<__int128_t>(val,
                            xc->readMiscReg(MISCREG_VXRM), sh);
                        Vd_vi[i] = val >> sh;
                    }}, OPIVV, VectorIntegerArithOp);
                }
                format VectorReduceIntWideningFormat {
                    0x30: vwredsumu_vs({{
                        Vd_vwu[0] = reduce_loop(std::plus<vwu>(),
                            Vs1_vwu, Vs2_vu);
                    }}, OPIVV, VectorIntegerReduceOp);
                    0x31: vwredsum_vs({{
                        Vd_vwu[0] = reduce_loop(std::plus<vwi>(),
                            Vs1_vwi, Vs2_vi);
                    }}, OPIVV, VectorIntegerReduceOp);
                }
                format VectorIntMaskFormat {
                    0x11: decode VM {
                        0x0: vmadc_vvm({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vu[i], Vs1_vu[i],
                                    elem_mask(v0, ei)));
                        }}, OPIVV, VectorIntegerArithOp);
                        0x1: vmadc_vv({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vu[i], Vs1_vu[i]));
                        }}, OPIVV, VectorIntegerArithOp);
                    }
                    0x13: decode VM {
                        0x0: vmsbc_vvm({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                borrow_out(Vs2_vi[i], Vs1_vi[i],
                                    elem_mask(v0, ei)));
                        }}, OPIVV, VectorIntegerArithOp);
                        0x1: vmsbc_vv({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                borrow_out(Vs2_vi[i], Vs1_vi[i]));
                        }}, OPIVV, VectorIntegerArithOp);
                    }
                    0x18: vmseq_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] == Vs1_vu[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x19: vmsne_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] != Vs1_vu[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x1a: vmsltu_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] < Vs1_vu[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x1b: vmslt_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] < Vs1_vi[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x1c: vmsleu_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] <= Vs1_vu[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x1d: vmsle_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] <= Vs1_vi[i]));
                    }}, OPIVV, VectorIntegerArithOp);
                }
                format VectorIntNarrowingFormat {
                    0x2c: vnsrl_wv({{
                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
                            ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2d: vnsra_wv({{
                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
                            ((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2e: vnclipu_wv({{
                        vu max = std::numeric_limits<vu>::max();
                        uint64_t sign_mask =
                            std::numeric_limits<uint64_t>::max() << sew;
                        __uint128_t res = Vs2_vwu[i];
                        unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1);

                        res = int_rounding<__uint128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res & sign_mask) {
                            res = max;
                            // TODO: vxsat
                        }

                        Vd_vu[i + offset] = (vu)res;
                    }}, OPIVV, VectorIntegerArithOp);
                    0x2f: vnclip_wv({{
                        vi max = std::numeric_limits<vi>::max();
                        vi min = std::numeric_limits<vi>::min();
                        __int128_t res = Vs2_vwi[i];
                        unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1);

                        res = int_rounding<__int128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res < min) {
                            res = min;
                            // TODO: vxsat
                        } else if (res > max) {
                            res = max;
                            // TODO: vxsat
                        }

                        Vd_vi[i + offset] = (vi)res;
                    }}, OPIVV, VectorIntegerArithOp);
                }
            }
            // OPFVV
            0x1: decode VFUNCT6 {
                0x00: VectorFloatFormat::vfadd_vv({{
                    auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
                                       ftype<et>(Vs1_vu[i]));
                    Vd_vu[i] = fd.v;
                }}, OPFVV, VectorFloatArithOp);
                0x01: VectorReduceFloatFormat::vfredusum_vs({{
                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
                        return fadd<et>(ftype<et>(src1), ftype<et>(src2));
                    }, Vs1_vu, Vs2_vu);
                }}, OPFVV, VectorFloatReduceOp);
                0x02: VectorFloatFormat::vfsub_vv({{
                    auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
                                       ftype<et>(Vs1_vu[i]));
                    Vd_vu[i] = fd.v;
                }}, OPFVV, VectorFloatArithOp);
                0x03: VectorReduceFloatFormat::vfredosum_vs({{
                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
                        return fadd<et>(ftype<et>(src1), ftype<et>(src2));
                    }, Vs1_vu, Vs2_vu);
                }}, OPFVV, VectorFloatReduceOp);
                0x04: VectorFloatFormat::vfmin_vv({{
                    auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
                                       ftype<et>(Vs1_vu[i]));
                    Vd_vu[i] = fd.v;
                }}, OPFVV, VectorFloatArithOp);
                0x05: VectorReduceFloatFormat::vfredmin_vs({{
                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
                        return fmin<et>(ftype<et>(src1), ftype<et>(src2));
                    }, Vs1_vu, Vs2_vu);
                }}, OPFVV, VectorFloatReduceOp);
                0x06: VectorFloatFormat::vfmax_vv({{
                    auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
                                       ftype<et>(Vs1_vu[i]));
                    Vd_vu[i] = fd.v;
                }}, OPFVV, VectorFloatArithOp);
                0x07: VectorReduceFloatFormat::vfredmax_vs({{
                    Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
                        return fmax<et>(ftype<et>(src1), ftype<et>(src2));
                    }, Vs1_vu, Vs2_vu);
                }}, OPFVV, VectorFloatReduceOp);
                0x08: VectorFloatFormat::vfsgnj_vv({{
                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                         ftype<et>(Vs1_vu[i]),
                                         false, false).v;
                }}, OPFVV, VectorFloatArithOp);
                0x09: VectorFloatFormat::vfsgnjn_vv({{
                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                         ftype<et>(Vs1_vu[i]),
                                         true, false).v;
                }}, OPFVV, VectorFloatArithOp);
                0x0a: VectorFloatFormat::vfsgnjx_vv({{
                    Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                         ftype<et>(Vs1_vu[i]),
                                         false, true).v;
                }}, OPFVV, VectorFloatArithOp);
                // VWFUNARY0
                0x10: decode VS1 {
                    0x00: decode VM {
                        // The encodings corresponding to the masked versions
                        // (vm=0) of vfmv.f.s are reserved
                        0x1: VectorNonSplitFormat::vfmv_f_s({{
                            freg_t fd = freg(Vs2_vu[0]);
                            Fd_bits = fd.v;
                        }}, OPFVV, VectorMiscOp);
                    }
                }
                0x12: decode VS1 {
                    format VectorFloatCvtFormat {
                        0x00: vfcvt_xu_f_v({{
                            Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
                                                   softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x01: vfcvt_x_f_v({{
                            Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
                                                  softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x02: vfcvt_f_xu_v({{
                            auto fd = ui_to_f<et>(Vs2_vu[i]);
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x03: vfcvt_f_x_v({{
                            auto fd = i_to_f<et>(Vs2_vu[i]);
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x06: vfcvt_rtz_xu_f_v({{
                            Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
                                                   softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x07: vfcvt_rtz_x_f_v({{
                            Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
                                                  softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                    }
                    format VectorFloatWideningCvtFormat {
                        0x08: vfwcvt_xu_f_v({{
                            Vd_vwu[i] = f_to_wui<et>(
                                ftype<et>(Vs2_vu[i + offset]),
                                softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x09: vfwcvt_x_f_v({{
                            Vd_vwu[i] = f_to_wi<et>(
                                ftype<et>(Vs2_vu[i + offset]),
                                softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x0a: vfwcvt_f_xu_v({{
                            auto fd = ui_to_wf<vu>(Vs2_vu[i + offset]);
                            Vd_vwu[i] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x0b: vfwcvt_f_x_v({{
                            auto fd = i_to_wf<vu>(Vs2_vu[i + offset]);
                            Vd_vwu[i] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x0c: vfwcvt_f_f_v({{
                            auto fd = f_to_wf<et>(
                                ftype<et>(Vs2_vu[i + offset]));
                            Vd_vwu[i] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x0e: vfwcvt_rtz_xu_f_v({{
                            Vd_vwu[i] = f_to_wui<et>(
                                ftype<et>(Vs2_vu[i + offset]),
                                softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x0f: vfwcvt_rtz_x_f_v({{
                            Vd_vwu[i] = f_to_wi<et>(
                                ftype<et>(Vs2_vu[i + offset]),
                                softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                    }
                    format VectorFloatNarrowingCvtFormat {
                        0x10: vfncvt_xu_f_w({{
                            Vd_vu[i + offset] = f_to_nui<vu>(
                                ftype<ewt>(Vs2_vwu[i]),
                                softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x11: vfncvt_x_f_w({{
                            Vd_vu[i + offset] = f_to_ni<vu>(
                                ftype<ewt>(Vs2_vwu[i]),
                                softfloat_roundingMode);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x12: vfncvt_f_xu_w({{
                            auto fd = ui_to_nf<et>(Vs2_vwu[i]);
                            Vd_vu[i + offset] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x13: vfncvt_f_x_w({{
                            auto fd = i_to_nf<et>(Vs2_vwu[i]);
                            Vd_vu[i + offset] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x14: vfncvt_f_f_w({{
                            auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
                            Vd_vu[i + offset] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x15: vfncvt_rod_f_f_w({{
                            softfloat_roundingMode = softfloat_round_odd;
                            auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
                            Vd_vu[i + offset] = fd.v;
                        }}, OPFVV, VectorFloatConvertOp);
                        0x16: vfncvt_rtz_xu_f_w({{
                            Vd_vu[i + offset] = f_to_nui<vu>(
                                ftype<ewt>(Vs2_vwu[i]),
                                softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                        0x17: vfncvt_rtz_x_f_w({{
                            Vd_vu[i + offset] = f_to_ni<vu>(
                                ftype<ewt>(Vs2_vwu[i]),
                                softfloat_round_minMag);
                        }}, OPFVV, VectorFloatConvertOp);
                    }
                }
                0x13: decode VS1 {
                    format VectorFloatCvtFormat {
                        0x00: vfsqrt_v({{
                            auto fd = fsqrt<et>(ftype<et>(Vs2_vu[i]));
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatArithOp);
                        0x04: vfrsqrt7_v({{
                            auto fd = frsqrte7<et>(ftype<et>(Vs2_vu[i]));
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatArithOp);
                        0x05: vfrec7_v({{
                            auto fd = frecip7<et>(ftype<et>(Vs2_vu[i]));
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatArithOp);
                        0x10: vfclass_v({{
                            auto fd = fclassify<et>(ftype<et>(Vs2_vu[i]));
                            Vd_vu[i] = fd.v;
                        }}, OPFVV, VectorFloatArithOp);
                    }
                }

                format VectorFloatMaskFormat {
                    0x18: vmfeq_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            feq<et>(ftype<et>(Vs2_vu[i]),
                                    ftype<et>(Vs1_vu[i])));
                    }}, OPFVV, VectorFloatArithOp);
                    0x19: vmfle_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            fle<et>(ftype<et>(Vs2_vu[i]),
                                    ftype<et>(Vs1_vu[i])));
                    }}, OPFVV, VectorFloatArithOp);
                    0x1b: vmflt_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            flt<et>(ftype<et>(Vs2_vu[i]),
                                    ftype<et>(Vs1_vu[i])));
                    }}, OPFVV, VectorFloatArithOp);
                    0x1c: vmfne_vv({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            !feq<et>(ftype<et>(Vs2_vu[i]),
                                    ftype<et>(Vs1_vu[i])));
                    }}, OPFVV, VectorFloatArithOp);
                }
                format VectorFloatFormat {
                    0x20: vfdiv_vv({{
                        auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
                                           ftype<et>(Vs1_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x24: vfmul_vv({{
                        auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
                                           ftype<et>(Vs1_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x28: vfmadd_vv({{
                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
                                            ftype<et>(Vs1_vu[i]),
                                            ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x29: vfnmadd_vv({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
                                            ftype<et>(Vs1_vu[i]),
                                            fneg(ftype<et>(Vs2_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2a: vfmsub_vv({{
                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
                                            ftype<et>(Vs1_vu[i]),
                                            fneg(ftype<et>(Vs2_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2b: vfnmsub_vv({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
                                            ftype<et>(Vs1_vu[i]),
                                            ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2c: vfmacc_vv({{
                        auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
                                            ftype<et>(Vs2_vu[i]),
                                            ftype<et>(Vs3_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2d: vfnmacc_vv({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
                                            ftype<et>(Vs2_vu[i]),
                                            fneg(ftype<et>(Vs3_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2e: vfmsac_vv({{
                        auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
                                            ftype<et>(Vs2_vu[i]),
                                            fneg(ftype<et>(Vs3_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x2f: vfnmsac_vv({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
                                            ftype<et>(Vs2_vu[i]),
                                            ftype<et>(Vs3_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{
                        Vd_vwu[0] = reduce_loop(
                            [](const vwu& src1, const vu& src2) {
                                return fadd<ewt>(
                                    ftype<ewt>(src1),
                                    f_to_wf<et>(ftype<et>(src2))
                                );
                            }, Vs1_vwu, Vs2_vu);
                    }}, OPFVV, VectorFloatReduceOp);
                    0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{
                        Vd_vwu[0] = reduce_loop(
                            [](const vwu& src1, const vu& src2) {
                                return fadd<ewt>(
                                    ftype<ewt>(src1),
                                    f_to_wf<et>(ftype<et>(src2))
                                );
                            }, Vs1_vwu, Vs2_vu);
                    }}, OPFVV, VectorFloatReduceOp);
                }
                format VectorFloatWideningFormat {
                    0x30: vfwadd_vv({{
                        auto fd = fadd<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype<et>(Vs1_vu[i + offset])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x32: vfwsub_vv({{
                        auto fd = fsub<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype<et>(Vs1_vu[i + offset])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x34: vfwadd_wv({{
                        auto fd = fadd<ewt>(
                            ftype<ewt>(Vs2_vwu[i]),
                            fwiden(ftype<et>(Vs1_vu[i + offset])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x36: vfwsub_wv({{
                        auto fd = fsub<ewt>(
                            ftype<ewt>(Vs2_vwu[i]),
                            fwiden(ftype<et>(Vs1_vu[i + offset])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x38: vfwmul_vv({{
                        auto fd = fmul<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype<et>(Vs1_vu[i + offset])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x3c: vfwmacc_vv({{
                        auto fd = fmadd<ewt>(
                            fwiden(ftype<et>(Vs1_vu[i + offset])),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            ftype<ewt>(Vs3_vwu[i]));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x3d: vfwnmacc_vv({{
                        auto fd = fmadd<ewt>(
                            fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fneg(ftype<ewt>(Vs3_vwu[i])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x3e: vfwmsac_vv({{
                        auto fd = fmadd<ewt>(
                            fwiden(ftype<et>(Vs1_vu[i + offset])),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fneg(ftype<ewt>(Vs3_vwu[i])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                    0x3f: vfwnmsac_vv({{
                        auto fd = fmadd<ewt>(
                            fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            ftype<ewt>(Vs3_vwu[i]));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVV, VectorFloatArithOp);
                }
            }
            // OPMVV
            0x2: decode VFUNCT6 {
                format VectorReduceIntFormat {
                    0x0: vredsum_vs({{
                        Vd_vi[0] =
                            reduce_loop(std::plus<vi>(), Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x1: vredand_vs({{
                        Vd_vi[0] =
                            reduce_loop(std::bit_and<vi>(), Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x2: vredor_vs({{
                        Vd_vi[0] =
                            reduce_loop(std::bit_or<vi>(), Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x3: vredxor_vs({{
                        Vd_vi[0] =
                            reduce_loop(std::bit_xor<vi>(), Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x4: vredminu_vs({{
                        Vd_vu[0] =
                            reduce_loop([](const vu& src1, const vu& src2) {
                                return std::min<vu>(src1, src2);
                            }, Vs1_vu, Vs2_vu);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x5: vredmin_vs({{
                        Vd_vi[0] =
                            reduce_loop([](const vi& src1, const vi& src2) {
                                return std::min<vi>(src1, src2);
                            }, Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x6: vredmaxu_vs({{
                        Vd_vu[0] =
                            reduce_loop([](const vu& src1, const vu& src2) {
                                return std::max<vu>(src1, src2);
                            }, Vs1_vu, Vs2_vu);
                    }}, OPMVV, VectorIntegerReduceOp);
                    0x7: vredmax_vs({{
                        Vd_vi[0] =
                            reduce_loop([](const vi& src1, const vi& src2) {
                                return std::max<vi>(src1, src2);
                            }, Vs1_vi, Vs2_vi);
                    }}, OPMVV, VectorIntegerReduceOp);
                }
                format VectorIntFormat {
                    0x8: vaaddu_vv({{
                        __uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i];
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vu[i] = res >> 1;
                    }}, OPMVV, VectorIntegerArithOp);
                    0x9: vaadd_vv({{
                        __uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i];
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vi[i] = res >> 1;
                    }}, OPMVV, VectorIntegerArithOp);
                    0xa: vasubu_vv({{
                        __uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i];
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vu[i] = res >> 1;
                    }}, OPMVV, VectorIntegerArithOp);
                    0xb: vasub_vv({{
                        __uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i];
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vi[i] = res >> 1;
                    }}, OPMVV, VectorIntegerArithOp);
                }
                // VWXUNARY0
                0x10: decode VS1 {
                    0x00: decode VM {
                        // The encodings corresponding to the masked versions
                        // (vm=0) of vmv.x.s are reserved.
                        0x1: VectorNonSplitFormat::vmv_x_s({{
                            Rd_ud = Vs2_vi[0];
                        }}, OPMVV, VectorMiscOp);
                    }
                    0x10: Vector1Vs1RdMaskFormat::vcpop_m({{
                        uint64_t popcount = 0;
                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
                            bool vs2_lsb = elem_mask(Vs2_vu, i);
                            if(this->vm){
                                popcount += vs2_lsb;
                            }else{
                                bool do_mask = elem_mask(v0, i);
                                popcount += (vs2_lsb && do_mask);
                            }
                        }
                        Rd_vu = popcount;
                    }}, OPMVV, VectorMiscOp);
                    0x11: Vector1Vs1RdMaskFormat::vfirst_m({{
                        int64_t pos = -1;
                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
                            if(this->vm == 0){
                                if(elem_mask(v0, i)==0){
                                    continue;
                                }
                            }
                            bool vs2_lsb = elem_mask(Vs2_vu, i);
                            if (vs2_lsb) {
                                pos = i;
                                break;
                            }
                        }
                        Rd_vu = pos;
                    }}, OPMVV, VectorMiscOp);
                }
                0x12: decode VS1 {
                    format VectorIntExtFormat {
                        0x02: vzext_vf8({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vu[i] = Vs2_vextu[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                        0x03: vsext_vf8({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vi[i] = Vs2_vext[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                        0x04: vzext_vf4({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vu[i] = Vs2_vextu[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                        0x05: vsext_vf4({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vi[i] = Vs2_vext[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                        0x06: vzext_vf2({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vu[i] = Vs2_vextu[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                        0x07: vsext_vf2({{
                            auto offset = (vlen / SEW) * index;

                            Vd_vi[i] = Vs2_vext[i + offset];
                        }}, OPMVV, VectorIntegerExtensionOp);
                    }
                }
                0x14: decode VS1 {
                    0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{
                        bool has_one = false;
                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
                            bool vs2_lsb = elem_mask(Vs2_vu, i);
                            if (this->vm || elem_mask(v0, i)){
                                uint64_t res = 0;
                                if (!has_one && !vs2_lsb) {
                                    res = 1;
                                } else if (!has_one && vs2_lsb) {
                                    has_one = true;
                                }
                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
                            }
                        }
                    }}, OPMVV, VectorMiscOp);
                    0x02: Vector1Vs1VdMaskFormat::vmsof_m({{
                        bool has_one = false;
                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
                            bool vs2_lsb = elem_mask(Vs2_vu, i);
                            if (this->vm || elem_mask(v0, i)){
                                uint64_t res = 0;
                                if (!has_one && vs2_lsb) {
                                    has_one = true;
                                    res = 1;
                                }
                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
                            }
                        }
                    }}, OPMVV, VectorMiscOp);
                    0x03: Vector1Vs1VdMaskFormat::vmsif_m({{
                        bool has_one = false;
                        for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
                            bool vs2_lsb = elem_mask(Vs2_vu, i);
                            if (this->vm || elem_mask(v0, i)){
                                uint64_t res = 0;
                                if (!has_one && !vs2_lsb) {
                                    res = 1;
                                } else if (!has_one && vs2_lsb) {
                                    has_one = true;
                                    res = 1;
                                }
                                Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
                            }
                        }
                    }}, OPMVV, VectorMiscOp);
                    0x10: ViotaFormat::viota_m({{
                        RiscvISAInst::VecRegContainer tmp_s2;
                        xc->getRegOperand(this, 2,
                            &tmp_s2);
                        auto Vs2bit = tmp_s2.as<vu>();
                        for (uint32_t i = 0; i < this->microVl; i++) {
                            uint32_t ei = i +
                                vtype_VLMAX(vtype, vlen, true) *
                                this->microIdx;
                            bool vs2_lsb = elem_mask(Vs2bit, ei);
                            bool do_mask = elem_mask(v0, ei);
                            bool has_one = false;
                            if (this->vm || (do_mask && !this->vm)) {
                                if (vs2_lsb) {
                                    has_one = true;
                                }
                            }
                            bool use_ori = (!this->vm) && !do_mask;
                            if(use_ori == false){
                                Vd_vu[i] = *cnt;
                            }
                            if (has_one) {
                                *cnt = *cnt+1;
                            }
                        }
                    }}, OPMVV, VectorMiscOp);
                    0x11: VectorIntFormat::vid_v({{
                        Vd_vu[i] = ei;
                    }}, OPMVV, VectorMiscOp);
                }
                format VectorMaskFormat {
                    0x18: vmandn_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i));
                    }}, OPMVV, VectorMiscOp);
                    0x19: vmand_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i));
                    }}, OPMVV, VectorMiscOp);
                    0x1a: vmor_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i));
                    }}, OPMVV, VectorMiscOp);
                    0x1b: vmxor_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i));
                    }}, OPMVV, VectorMiscOp);
                    0x1c: vmorn_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i));
                    }}, OPMVV, VectorMiscOp);
                    0x1d: vmnand_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            !(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)));
                    }}, OPMVV, VectorMiscOp);
                    0x1e: vmnor_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            !(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)));
                    }}, OPMVV, VectorMiscOp);
                    0x1f: vmxnor_mm({{
                        Vd_ub[i/8] = ASSIGN_VD_BIT(i,
                            !(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)));
                    }}, OPMVV, VectorMiscOp);
                }
                format VectorIntFormat {
                    0x20: vdivu_vv({{
                        Vd_vu[i] = divu<vu>(Vs2_vu[i], Vs1_vu[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x21: vdiv_vv({{
                        Vd_vi[i] = div<vi>(Vs2_vi[i], Vs1_vi[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x22: vremu_vv({{
                        Vd_vu[i] = remu<vu>(Vs2_vu[i], Vs1_vu[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x23: vrem_vv({{
                        Vd_vi[i] = rem<vi>(Vs2_vi[i], Vs1_vi[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x24: vmulhu_vv({{
                        Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Vs1_vu[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x25: vmul_vv({{
                        Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x26: vmulhsu_vv({{
                        Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Vs1_vu[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x27: vmulh_vv({{
                        Vd_vi[i] = mulh<vi>(Vs2_vi[i], Vs1_vi[i]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x29: vmadd_vv({{
                        Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x2b: vnmsub_vv({{
                        Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x2d: vmacc_vv({{
                        Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x2f: vnmsac_vv({{
                        Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                }
                format VectorIntWideningFormat {
                    0x30: vwaddu_vv({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
                                + vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x31: vwadd_vv({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
                                + vwi(Vs1_vi[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x32: vwsubu_vv({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
                                - vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x33: vwsub_vv({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
                                - vwi(Vs1_vi[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x34: vwaddu_wv({{
                        Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x35: vwadd_wv({{
                        Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x36: vwsubu_wv({{
                        Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x37: vwsub_wv({{
                        Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x38: vwmulu_vv({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset])
                                * vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x3a: vwmulsu_vv({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
                                * vwu(Vs1_vu[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x3b: vwmul_vv({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset])
                                * vwi(Vs1_vi[i + offset]);
                    }}, OPMVV, VectorIntegerArithOp);
                    0x3c: vwmaccu_vv({{
                        Vd_vwu[i] = vwu(Vs1_vu[i + offset])
                                * vwu(Vs2_vu[i + offset])
                                + Vs3_vwu[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x3d: vwmacc_vv({{
                        Vd_vwi[i] = vwi(Vs1_vi[i + offset])
                                * vwi(Vs2_vi[i + offset])
                                + Vs3_vwi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                    0x3f: vwmaccsu_vv({{
                        Vd_vwi[i] = vwi(Vs1_vi[i + offset])
                                * vwu(Vs2_vu[i + offset])
                                + Vs3_vwi[i];
                    }}, OPMVV, VectorIntegerArithOp);
                }
            }
            // OPIVI
            0x3: decode VFUNCT6 {
                format VectorIntFormat {
                    0x00: vadd_vi({{
                        Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x03: vrsub_vi({{
                        Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i];
                    }}, OPIVI, VectorIntegerArithOp);
                    0x09: vand_vi({{
                        Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x0a: vor_vi({{
                        Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x0b: vxor_vi({{
                        Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5);
                    }}, OPIVI, VectorIntegerArithOp);
                }
                0x0c: VectorGatherFormat::vrgather_vi({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
                        if (this->vm || elem_mask(v0, ei)) {
                            const uint64_t idx =
                                (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx;
                            Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i];
                        }
                    }
                }}, OPIVI, VectorMiscOp);
                0x0e: VectorSlideUpFormat::vslideup_vi({{
                    const int offset = (int)(uint64_t)(SIMM5);
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vdIdx - vs2Idx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int elemOffset = vdOffset + vdIdx * microVlmax;
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            if (this->vm || elem_mask(v0, i + elemOffset)) {
                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
                            }
                        }
                    }
                }}, OPIVI, VectorMiscOp);
                0x0f: VectorSlideDownFormat::vslidedown_vi({{
                    const int offset = (int)(uint64_t)(SIMM5);
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vs2Idx - vdIdx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    const int numVs2s = vtype_regs_per_group(vtype);
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const bool needZeroTail = numVs2s == vs2Idx + 1;
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int elemIdxBase = vdIdx * microVlmax;
                        vreg_t resVreg;
                        auto res = resVreg.as<vu>();
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
                        }
                        if (needZeroTail) {
                            for (int i = upperBound + vdOffset;
                                i < microVlmax; i++) {
                                res[i] = 0;
                            }
                        }
                        for (int i = vdOffset; i < microVl ; i++) {
                            if (vm || elem_mask(v0, i + elemIdxBase)) {
                                Vd_vu[i] = res[i];
                            }
                        }
                    }
                }}, OPIVI, VectorMiscOp);
                format VectorIntFormat {
                    0x10: decode VM {
                        0x0: vadc_vim({{
                            Vd_vi[i] = Vs2_vi[i] +
                                (vi)sext<5>(SIMM5) + elem_mask(v0, ei);
                        }}, OPIVI, VectorIntegerArithOp);
                        // the unmasked versions (vm=1) are reserved
                    }
                    0x17: decode VM {
                        0x0: vmerge_vim({{
                            Vd_vi[i] = elem_mask(v0, ei)
                                    ? (vi)sext<5>(SIMM5)
                                    : Vs2_vi[i];
                        }}, OPIVI, VectorIntegerArithOp);
                        0x1: vmv_v_i({{
                            Vd_vi[i] = (vi)sext<5>(SIMM5);
                        }}, OPIVI, VectorIntegerArithOp);
                    }
                }
                format VectorIntVxsatFormat{
                    0x20: vsaddu_vi({{
                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], (vu)sext<5>(SIMM5),
                            vxsatptr);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x21: vsadd_vi({{
                        Vd_vi[i] = sat_add<vi>(Vs2_vi[i], (vi)sext<5>(SIMM5),
                            vxsatptr);
                    }}, OPIVI, VectorIntegerArithOp);
                }
                format VectorIntFormat {
                    0x25: vsll_vi({{
                        Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x28: vsrl_vi({{
                        Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x2a: vssrl_vi({{
                        int sh = SIMM5 & (vtype_SEW(vtype) - 1);
                        __uint128_t res = Vs2_vu[i];

                        res = int_rounding<__uint128_t>(
                            res, 0 /* TODO */, sh) >> sh;

                        Vd_vu[i] = res;
                    }}, OPIVI, VectorIntegerArithOp);
                    0x29: vsra_vi({{
                        Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
                    }}, OPIVI, VectorIntegerArithOp);
                    0x2b: vssra_vi({{
                        int sh = SIMM5 & (sew - 1);
                        __int128_t val = Vs2_vi[i];

                        val = int_rounding<__int128_t>(val,
                            xc->readMiscReg(MISCREG_VXRM), sh);
                        Vd_vi[i] = val >> sh;
                    }}, OPIVI, VectorIntegerArithOp);
                }
                // According to Spec Section 16.6,
                // vm must be 1 (unmasked) in vmv<nr>r.v instructions.
                0x27: decode VM { 0x1: decode SIMM3 {
                    format VMvWholeFormat {
                        0x0: vmv1r_v({{
                            Vd_ud[i] = Vs2_ud[i];
                        }}, OPIVI, VectorMiscOp);
                        0x1: vmv2r_v({{
                            Vd_ud[i] = Vs2_ud[i];
                        }}, OPIVI, VectorMiscOp);
                        0x3: vmv4r_v({{
                            Vd_ud[i] = Vs2_ud[i];
                        }}, OPIVI, VectorMiscOp);
                        0x7: vmv8r_v({{
                            Vd_ud[i] = Vs2_ud[i];
                        }}, OPIVI, VectorMiscOp);
                    }
                }}
                format VectorIntMaskFormat {
                    0x11: decode VM {
                        0x0: vmadc_vim({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5),
                                    elem_mask(v0, ei)));
                        }}, OPIVI, VectorIntegerArithOp);
                        0x1: vmadc_vi({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5)));
                        }}, OPIVI, VectorIntegerArithOp);
                    }
                    0x18: vmseq_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] == (vi)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x19: vmsne_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] != (vi)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x1c: vmsleu_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] <= (vu)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x1d: vmsle_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] <= (vi)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x1e: vmsgtu_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] > (vu)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x1f: vmsgt_vi({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] > (vi)sext<5>(SIMM5)));
                    }}, OPIVI, VectorIntegerArithOp);
                }
                format VectorIntNarrowingFormat {
                    0x2c: vnsrl_wi({{
                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
                                            ((vwu)SIMM5 & (sew * 2 - 1)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x2d: vnsra_wi({{
                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
                                            ((vwu)SIMM5 & (sew * 2 - 1)));
                    }}, OPIVI, VectorIntegerArithOp);
                    0x2e: vnclipu_wi({{
                        vu max = std::numeric_limits<vu>::max();
                        uint64_t sign_mask =
                            std::numeric_limits<uint64_t>::max() << sew;
                        __uint128_t res = Vs2_vwu[i];
                        unsigned shift = VS1 & ((sew * 2) - 1);

                        res = int_rounding<__uint128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res & sign_mask) {
                            // TODO: vxsat
                            res = max;
                        }

                        Vd_vu[i + offset] = (vu)res;
                    }}, OPIVI, VectorIntegerArithOp);
                    0x2f: vnclip_wi({{
                        vi max = std::numeric_limits<vi>::max();
                        vi min = std::numeric_limits<vi>::min();
                        __int128_t res = Vs2_vwi[i];
                        unsigned shift = VS1 & ((sew * 2) - 1);

                        res = int_rounding<__int128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res < min) {
                            res = min;
                            // TODO: vxsat
                        } else if (res > max) {
                            res = max;
                            // TODO: vxsat
                        }

                        Vd_vi[i + offset] = (vi)res;
                    }}, OPIVI, VectorIntegerArithOp);
                }
            }
            // OPIVX
            0x4: decode VFUNCT6 {
                format VectorIntFormat {
                    0x0: vadd_vx({{
                        Vd_vu[i] = Vs2_vu[i] + Rs1_vu;
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2: vsub_vx({{
                        Vd_vu[i] = Vs2_vu[i] - Rs1_vu;
                    }}, OPIVX, VectorIntegerArithOp);
                    0x3: vrsub_vx({{
                        Vd_vu[i] = Rs1_vu - Vs2_vu[i];
                    }}, OPIVX, VectorIntegerArithOp);
                    0x4: vminu_vx({{
                        Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x5: vmin_vx({{
                        Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x6: vmaxu_vx({{
                        Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x7: vmax_vx({{
                        Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x9: vand_vx({{
                        Vd_vu[i] = Vs2_vu[i] & Rs1_vu;
                    }}, OPIVX, VectorIntegerArithOp);
                    0xa: vor_vx({{
                        Vd_vu[i] = Vs2_vu[i] | Rs1_vu;
                    }}, OPIVX, VectorIntegerArithOp);
                    0xb: vxor_vx({{
                        Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu;
                    }}, OPIVX, VectorIntegerArithOp);
                }
                0x0e: VectorSlideUpFormat::vslideup_vx({{
                    const int offset = (int)Rs1_vu;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vdIdx - vs2Idx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int elemOffset = vdOffset + vdIdx * microVlmax;
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            if (this->vm || elem_mask(v0, i + elemOffset)) {
                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
                            }
                        }
                    }
                }}, OPIVX, VectorMiscOp);
                0x0f: VectorSlideDownFormat::vslidedown_vx({{
                    const int offset = (int)Rs1_vu;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vs2Idx - vdIdx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    const int numVs2s = vtype_regs_per_group(vtype);
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const bool needZeroTail = numVs2s == vs2Idx + 1;
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int elemIdxBase = vdIdx * microVlmax;
                        vreg_t resVreg;
                        auto res = resVreg.as<vu>();
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
                        }
                        if (needZeroTail) {
                            for (int i = upperBound + vdOffset;
                                i < microVlmax; i++) {
                                res[i] = 0;
                            }
                        }
                        for (int i = vdOffset; i < microVl ; i++) {
                            if (vm || elem_mask(v0, i + elemIdxBase)) {
                                Vd_vu[i] = res[i];
                            }
                        }
                    }
                }}, OPIVX, VectorMiscOp);
                0x0c: VectorGatherFormat::vrgather_vx({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
                        if (this->vm || elem_mask(v0, ei)) {
                            const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx;
                            Vd_vu[i] = (Rs1_vu >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i];
                        }
                    }
                }}, OPIVX, VectorMiscOp);
                format VectorIntFormat {
                    0x10: decode VM {
                        0x0: vadc_vxm({{
                            Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei);
                        }}, OPIVX, VectorIntegerArithOp);
                        // the unmasked versions (vm=1) are reserved
                    }
                    0x12: decode VM {
                        0x0: vsbc_vxm({{
                            Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei);
                        }}, OPIVX, VectorIntegerArithOp);
                        // the unmasked versions (vm=1) are reserved
                    }
                    0x17: decode VM {
                        0x0: vmerge_vxm({{
                            Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i];
                        }}, OPIVX, VectorIntegerArithOp);
                        0x1: decode VS2 {
                            0x0: vmv_v_x({{
                                Vd_vu[i] = Rs1_vu;
                            }}, OPIVX, VectorIntegerArithOp);
                        }
                    }
                }
                format VectorIntVxsatFormat{
                    0x20: vsaddu_vx({{
                        Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Rs1_vu,
                            vxsatptr);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x21: vsadd_vx({{
                        Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Rs1_vu,
                            vxsatptr);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x22: vssubu_vx({{
                        Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Rs1_vu,
                            vxsatptr);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x23: vssub_vx({{
                        Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Rs1_vu,
                            vxsatptr);
                    }}, OPIVX, VectorIntegerArithOp);
                    0x27: vsmul_vx({{
                        vi max = std::numeric_limits<vi>::max();
                        vi min = std::numeric_limits<vi>::min();
                        bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min;
                        __int128_t result =
                            (__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i];
                        result = int_rounding<__uint128_t>(
                            result, 0 /* TODO */, sew - 1);
                        result = result >> (sew - 1);
                        if (overflow) {
                            result = max;
                            *vxsatptr = true;
                        }

                        Vd_vi[i] = (vi)result;
                    }}, OPIVX, VectorIntegerArithOp);
                }
                format VectorIntFormat {
                    0x25: vsll_vx({{
                        Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x28: vsrl_vx({{
                        Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x29: vsra_vx({{
                        Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2a: vssrl_vx({{
                        int sh = Rs1_vu & (sew - 1);
                        __uint128_t val = Vs2_vu[i];

                        val = int_rounding<__uint128_t>(val,
                            xc->readMiscReg(MISCREG_VXRM), sh);
                        Vd_vu[i] = val >> sh;
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2b: vssra_vx({{
                        int sh = Rs1_vu & (sew - 1);
                        __int128_t val = Vs2_vi[i];

                        val = int_rounding<__int128_t>(val,
                            xc->readMiscReg(MISCREG_VXRM), sh);
                        Vd_vi[i] = val >> sh;
                    }}, OPIVX, VectorIntegerArithOp);
                }
                format VectorIntNarrowingFormat {
                    0x2c: vnsrl_wx({{
                        Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
                                            ((vwu)Rs1_vu & (sew * 2 - 1)));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2d: vnsra_wx({{
                        Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
                                            ((vwu)Rs1_vu & (sew * 2 - 1)));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2e: vnclipu_wx({{
                        vu max = std::numeric_limits<vu>::max();
                        uint64_t sign_mask =
                            std::numeric_limits<uint64_t>::max() << sew;
                        __uint128_t res = Vs2_vwu[i];
                        unsigned shift = Rs1_vu & ((sew * 2) - 1);

                        res = int_rounding<__uint128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res & sign_mask) {
                            // TODO: vxsat
                            res = max;
                        }

                        Vd_vu[i + offset] = (vu)res;
                    }}, OPIVX, VectorIntegerArithOp);
                    0x2f: vnclip_wx({{
                        vi max = std::numeric_limits<vi>::max();
                        vi min = std::numeric_limits<vi>::min();
                        __int128_t res = Vs2_vwi[i];
                        unsigned shift = Rs1_vi & ((sew * 2) - 1);

                        res = int_rounding<__int128_t>(
                            res, 0 /* TODO */, shift) >> shift;

                        if (res < min) {
                            res = min;
                            // TODO: vxsat
                        } else if (res > max) {
                            res = max;
                            // TODO: vxsat
                        }

                        Vd_vi[i + offset] = (vi)res;
                    }}, OPIVX, VectorIntegerArithOp);
                }

                format VectorIntMaskFormat {
                    0x11: decode VM {
                        0x0: vmadc_vxm({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vi[i], Rs1_vi,
                                    elem_mask(v0, ei)));
                        }}, OPIVX, VectorIntegerArithOp);
                        0x1: vmadc_vx({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                carry_out(Vs2_vi[i], Rs1_vi));
                        }}, OPIVX, VectorIntegerArithOp);
                    }
                    0x13: decode VM {
                        0x0: vmsbc_vxm({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                borrow_out(Vs2_vi[i], Rs1_vi,
                                    elem_mask(v0, ei)));
                        }}, OPIVX, VectorIntegerArithOp);
                        0x1: vmsbc_vx({{
                            Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                                borrow_out(Vs2_vi[i], Rs1_vi));
                        }}, OPIVX, VectorIntegerArithOp);
                    }
                    0x18: vmseq_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] == Rs1_vu));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x19: vmsne_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] != Rs1_vu));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1a: vmsltu_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] < Rs1_vu));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1b: vmslt_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] < Rs1_vi));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1c: vmsleu_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] <= Rs1_vu));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1d: vmsle_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] <= Rs1_vi));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1e: vmsgtu_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vu[i] > Rs1_vu));
                    }}, OPIVX, VectorIntegerArithOp);
                    0x1f: vmsgt_vx({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            (Vs2_vi[i] > Rs1_vi));
                    }}, OPIVX, VectorIntegerArithOp);
                }
            }
            // OPFVF
            0x5: decode VFUNCT6 {
                format VectorFloatFormat{
                    0x00: vfadd_vf({{
                        auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x02: vfsub_vf({{
                        auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x04: vfmin_vf({{
                        auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x06: vfmax_vf({{
                        auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                            Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x08: vfsgnj_vf({{
                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                             ftype_freg<et>(freg(Fs1_bits)),
                                             false, false).v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x09: vfsgnjn_vf({{
                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                             ftype_freg<et>(freg(Fs1_bits)),
                                             true, false).v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x0a: vfsgnjx_vf({{
                        Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
                                             ftype_freg<et>(freg(Fs1_bits)),
                                             false, true).v;
                    }}, OPFVF, VectorFloatArithOp);
                }
                0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{
                    const int offset = 1;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vdIdx - vs2Idx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int elemOffset = vdOffset + vdIdx * microVlmax;
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            if (this->vm || elem_mask(v0, i + elemOffset)) {
                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
                            }
                        }
                        // TODO: dirty code
                        if (vdIdx == 0 && vs2Idx == 0 &&
                                (this->vm || elem_mask(v0, 0))) {
                            tmp_d0.as<vu>()[0] = Rs1_vu;
                        }
                    }
                }}, OPFVF, VectorMiscOp);
                0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{
                    const int offset = 1;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vs2Idx - vdIdx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    const int numVs2s = vtype_regs_per_group(vtype);
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const bool needZeroTail = numVs2s == vs2Idx + 1;
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int elemIdxBase = vdIdx * microVlmax;
                        vreg_t resVreg;
                        auto res = resVreg.as<vu>();
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
                        }
                        if (needZeroTail) {
                            for (int i = upperBound + vdOffset;
                                i < microVlmax; i++) {
                                res[i] = 0;
                            }
                        }
                        for (int i = vdOffset; i < microVl ; i++) {
                            if (vm || elem_mask(v0, i + elemIdxBase)) {
                                Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
                                    ? res[i]
                                    : Rs1_vu;
                            }
                        }
                    }
                }}, OPFVF, VectorMiscOp);
                // VRFUNARY0
                0x10: decode VS2 {
                    0x00: decode VM {
                        // The encodings corresponding to the masked versions
                        // (vm=0) of vfmv.s.f are reserved
                        0x1: VectorNonSplitFormat::vfmv_s_f({{
                            if (this->vl) {
                                auto fd = ftype_freg<et>(freg(Fs1_bits));
                                Vd_vu[0] = fd.v;
                            }
                        }}, OPFVV, VectorMiscOp);
                    }
                }
                format VectorFloatFormat{
                    0x17: decode VM {
                        0x0: vfmerge_vfm({{
                            Vd_vu[i] = elem_mask(v0, ei)
                                    ? ftype_freg<et>(freg(Fs1_bits)).v
                                    : Vs2_vu[i];
                        }}, OPFVF, VectorFloatArithOp);
                        0x1: vfmv_v_f({{
                            auto fd = ftype_freg<et>(freg(Fs1_bits));
                            Vd_vu[i] = fd.v;
                        }}, OPFVF, VectorFloatArithOp);
                    }
                }
                format VectorFloatMaskFormat {
                    0x18: vmfeq_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            feq<et>(ftype<et>(Vs2_vu[i]),
                                    ftype_freg<et>(freg(Fs1_bits))));
                    }}, OPFVF, VectorFloatArithOp);
                    0x19: vmfle_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            fle<et>(ftype<et>(Vs2_vu[i]),
                                    ftype_freg<et>(freg(Fs1_bits))));
                    }}, OPFVF, VectorFloatArithOp);
                    0x1b: vmflt_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            flt<et>(ftype<et>(Vs2_vu[i]),
                                    ftype_freg<et>(freg(Fs1_bits))));
                    }}, OPFVF, VectorFloatArithOp);
                    0x1c: vmfne_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            !feq<et>(ftype<et>(Vs2_vu[i]),
                                     ftype_freg<et>(freg(Fs1_bits))));
                    }}, OPFVF, VectorFloatArithOp);
                    0x1d: vmfgt_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            flt<et>(ftype_freg<et>(freg(Fs1_bits)),
                                    ftype<et>(Vs2_vu[i])));
                    }}, OPFVF, VectorFloatArithOp);
                    0x1f: vmfge_vf({{
                        Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
                            fle<et>(ftype_freg<et>(freg(Fs1_bits)),
                                    ftype<et>(Vs2_vu[i])));
                    }}, OPFVF, VectorFloatArithOp);
                }
                format VectorFloatFormat{
                    0x20: vfdiv_vf({{
                        auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x21: vfrdiv_vf({{
                        auto fd = fdiv<et>(ftype_freg<et>(freg(Fs1_bits)),
                                           ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x24: vfmul_vf({{
                        auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
                                           ftype_freg<et>(freg(Fs1_bits)));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x27: vfrsub_vf({{
                        auto fd = fsub<et>(ftype_freg<et>(freg(Fs1_bits)),
                                           ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x28: vfmadd_vf({{
                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
                                            ftype_freg<et>(freg(Fs1_bits)),
                                            ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x29: vfnmadd_vf({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
                                            ftype_freg<et>(freg(Fs1_bits)),
                                            fneg(ftype<et>(Vs2_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2a: vfmsub_vf({{
                        auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
                                            ftype_freg<et>(freg(Fs1_bits)),
                                            fneg(ftype<et>(Vs2_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2b: vfnmsub_vf({{
                        auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
                                            ftype_freg<et>(freg(Fs1_bits)),
                                            ftype<et>(Vs2_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2c: vfmacc_vf({{
                        auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
                                            ftype<et>(Vs2_vu[i]),
                                            ftype<et>(Vs3_vu[i]));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2d: vfnmacc_vf({{
                        auto fd = fmadd<et>(
                            fneg(ftype_freg<et>(freg(Fs1_bits))),
                            ftype<et>(Vs2_vu[i]),
                            fneg(ftype<et>(Vs3_vu[i]))
                        );
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2e: vfmsac_vf({{
                        auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
                                            ftype<et>(Vs2_vu[i]),
                                            fneg(ftype<et>(Vs3_vu[i])));
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x2f: vfnmsac_vf({{
                        auto fd = fmadd<et>(
                            fneg(ftype_freg<et>(freg(Fs1_bits))),
                            ftype<et>(Vs2_vu[i]),
                            ftype<et>(Vs3_vu[i])
                        );
                        Vd_vu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                }
                format VectorFloatWideningFormat {
                    0x30: vfwadd_vf({{
                        auto fd = fadd<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x32: vfwsub_vf({{
                        auto fd = fsub<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x34: vfwadd_wf({{
                        auto fd = fadd<ewt>(
                            ftype<ewt>(Vs2_vwu[i]),
                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x36: vfwsub_wf({{
                        auto fd = fsub<ewt>(
                            ftype<ewt>(Vs2_vwu[i]),
                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x38: vfwmul_vf({{
                        auto fd = fmul<ewt>(
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fwiden(ftype_freg<et>(freg(Fs1_bits))));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x3c: vfwmacc_vf({{
                        auto fd = fmadd<ewt>(
                            fwiden(ftype_freg<et>(freg(Fs1_bits))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            ftype<ewt>(Vs3_vwu[i]));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x3d: vfwnmacc_vf({{
                        auto fd = fmadd<ewt>(
                            fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fneg(ftype<ewt>(Vs3_vwu[i])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x3e: vfwmsac_vf({{
                        auto fd = fmadd<ewt>(
                            fwiden(ftype_freg<et>(freg(Fs1_bits))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            fneg(ftype<ewt>(Vs3_vwu[i])));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                    0x3f: vfwnmsac_vf({{
                        auto fd = fmadd<ewt>(
                            fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
                            fwiden(ftype<et>(Vs2_vu[i + offset])),
                            ftype<ewt>(Vs3_vwu[i]));
                        Vd_vwu[i] = fd.v;
                    }}, OPFVF, VectorFloatArithOp);
                }
            }
            // OPMVX
            0x6: decode VFUNCT6 {
                format VectorIntFormat {
                    0x08: vaaddu_vx({{
                        __uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu;
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vu[i] = res >> 1;
                    }}, OPMVX, VectorIntegerArithOp);
                    0x09: vaadd_vx({{
                        __uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi;
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vi[i] = res >> 1;
                    }}, OPMVX, VectorIntegerArithOp);
                }
                0x0e: VectorSlideUpFormat::vslide1up_vx({{
                    const int offset = 1;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vdIdx - vs2Idx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int elemOffset = vdOffset + vdIdx * microVlmax;
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            if (this->vm || elem_mask(v0, i + elemOffset)) {
                                Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
                            }
                        }
                        // TODO: dirty code
                        if (vdIdx == 0 && vs2Idx == 0 &&
                                (this->vm || elem_mask(v0, 0))) {
                            tmp_d0.as<vu>()[0] = Rs1_vu;
                        }
                    }
                }}, OPIVX, VectorMiscOp);
                0x0f: VectorSlideDownFormat::vslide1down_vx({{
                    const int offset = 1;
                    const int microVlmax = vtype_VLMAX(machInst.vtype8,
                        vlen, true);
                    const int vregOffset = vs2Idx - vdIdx;
                    const int offsetInVreg = offset - vregOffset * microVlmax;
                    const int numVs2s = vtype_regs_per_group(vtype);
                    if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
                        const bool needZeroTail = numVs2s == vs2Idx + 1;
                        const int upperBound = (offsetInVreg >= 0)
                            ? microVlmax - offsetInVreg
                            : microVlmax + offsetInVreg;
                        const int vdOffset = (offsetInVreg >= 0)
                            ? 0
                            : -offsetInVreg;
                        const int vs2Offset = (offsetInVreg >= 0)
                            ? offsetInVreg
                            : 0;
                        const int elemIdxBase = vdIdx * microVlmax;
                        vreg_t resVreg;
                        auto res = resVreg.as<vu>();
                        for (int i = 0;
                            i < upperBound && i + vdOffset < microVl;
                            i++) {
                            res[i + vdOffset] = Vs2_vu[i + vs2Offset];
                        }
                        if (needZeroTail) {
                            for (int i = upperBound + vdOffset;
                                i < microVlmax; i++) {
                                res[i] = 0;
                            }
                        }
                        for (int i = vdOffset; i < microVl ; i++) {
                            if (vm || elem_mask(v0, i + elemIdxBase)) {
                                Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
                                    ? res[i]
                                    : Rs1_vu;
                            }
                        }
                    }
                }}, OPIVX, VectorMiscOp);
                // VRXUNARY0
                0x10: decode VS2 {
                    0x00: decode VM {
                        // The encodings corresponding to the masked versions
                        // (vm=0) of vmv.s.x are reserved.
                        0x1: VectorNonSplitFormat::vmv_s_x({{
                            if (this->vl) {
                                Vd_vu[0] = Rs1_vu;
                            }
                        }}, OPMVX, VectorMiscOp);
                    }
                }
                format VectorIntFormat {
                    0x0a: vasubu_vx({{
                        __uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu;
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vu[i] = res >> 1;
                    }}, OPMVX, VectorIntegerArithOp);
                    0x0b: vasub_vx({{
                        __uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi;
                        res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
                        Vd_vi[i] = res >> 1;
                    }}, OPMVX, VectorIntegerArithOp);
                    0x20: vdivu_vx({{
                        Vd_vu[i] = divu<vu>(Vs2_vu[i], Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x21: vdiv_vx({{
                        Vd_vi[i] = div<vi>(Vs2_vi[i], Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x22: vremu_vx({{
                        Vd_vu[i] = remu<vu>(Vs2_vu[i], Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x23: vrem_vx({{
                        Vd_vi[i] = rem<vi>(Vs2_vi[i], Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x24: vmulhu_vx({{
                        Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x25: vmul_vx({{
                        Vd_vi[i] = Vs2_vi[i] * Rs1_vi;
                    }}, OPMVX, VectorIntegerArithOp);
                    0x26: vmulhsu_vx({{
                        Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x27: vmulh_vx({{
                        Vd_vi[i] = mulh<vi>(Vs2_vi[i], Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x29: vmadd_vx({{
                        Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x2b: vnmsub_vx({{
                        Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x2d: vmacc_vx({{
                        Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x2f: vnmsac_vx({{
                        Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                }
                format VectorIntWideningFormat {
                    0x30: vwaddu_vx({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x31: vwadd_vx({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x32: vwsubu_vx({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x33: vwsub_vx({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x34: vwaddu_wx({{
                        Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x35: vwadd_wx({{
                        Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x36: vwsubu_wx({{
                        Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x37: vwsub_wx({{
                        Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x38: vwmulu_vx({{
                        Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3a: vwmulsu_vx({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3b: vwmul_vx({{
                        Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi);
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3c: vwmaccu_vx({{
                        Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset])
                                + Vs3_vwu[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3d: vwmacc_vx({{
                        Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset])
                                + Vs3_vwi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3e: vwmaccus_vx({{
                        Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset])
                                + Vs3_vwi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                    0x3f: vwmaccsu_vx({{
                        Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset])
                                + Vs3_vwi[i];
                    }}, OPMVX, VectorIntegerArithOp);
                }
            }
            0x7: decode BIT31 {
                format VConfOp {
                    0x0: vsetvli({{
                        uint64_t rd_bits = RD;
                        uint64_t rs1_bits = RS1;
                        uint64_t requested_vl = Rs1_ud;
                        uint64_t requested_vtype = zimm11;
                        uint32_t vlen = VlenbBits * 8;
                        uint32_t vlmax = getVlmax(Vtype, vlen);
                        uint32_t current_vl = VL;
                    }}, {{
                        Rd_ud = new_vl;
                        VL = new_vl;
                        Vtype = new_vtype;
                    }}, VSetVlDeclare, VSetVliBranchTarget
                      , VectorConfigOp, IsUncondControl
                      , IsIndirectControl);
                    0x1: decode BIT30 {
                        0x0: vsetvl({{
                            uint64_t rd_bits = RD;
                            uint64_t rs1_bits = RS1;
                            uint64_t requested_vl = Rs1_ud;
                            uint64_t requested_vtype = Rs2_ud;
                            uint32_t vlen = VlenbBits * 8;
                            uint32_t vlmax = getVlmax(Vtype, vlen);
                            uint32_t current_vl = VL;
                        }}, {{
                            Rd_ud = new_vl;
                            VL = new_vl;
                            Vtype = new_vtype;
                        }}, VSetVlDeclare, VSetVlBranchTarget
                          , VectorConfigOp, IsUncondControl
                          , IsIndirectControl);
                        0x1: vsetivli({{
                            uint64_t rd_bits = RD;
                            uint64_t rs1_bits = -1;
                            uint64_t requested_vl = uimm;
                            uint64_t requested_vtype = zimm10;
                            uint32_t vlen = VlenbBits * 8;
                            uint32_t vlmax = getVlmax(Vtype, vlen);
                            uint32_t current_vl = VL;
                        }}, {{
                            Rd_ud = new_vl;
                            VL = new_vl;
                            Vtype = new_vtype;
                        }}, VSetiVliDeclare, VSetiVliBranchTarget
                          , VectorConfigOp, IsUncondControl
                          , IsDirectControl);
                    }
                }
            }
        }

        0x18: decode FUNCT3 {
            format BOp {
                0x0: beq({{
                    if (rvSext(Rs1) == rvSext(Rs2)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
                0x1: bne({{
                    if (rvSext(Rs1) != rvSext(Rs2)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
                0x4: blt({{
                    if (rvSext(Rs1_sd) < rvSext(Rs2_sd)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
                0x5: bge({{
                    if (rvSext(Rs1_sd) >= rvSext(Rs2_sd)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
                0x6: bltu({{
                    if (rvZext(Rs1) < rvZext(Rs2)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
                0x7: bgeu({{
                    if (rvZext(Rs1) >= rvZext(Rs2)) {
                        NPC = rvZext(PC + imm);
                    } else {
                        NPC = rvZext(NPC);
                    }
                }}, IsDirectControl, IsCondControl);
            }
        }

        0x19: decode FUNCT3 {
            0x0: Jump::jalr({{
                Rd = rvSext(NPC);
                NPC = rvZext((imm + Rs1) & (~0x1));
            }}, IsIndirectControl, IsUncondControl);
        }

        0x1b: JOp::jal({{
            Rd = rvSext(NPC);
            NPC = rvZext(PC + imm);
        }}, IsDirectControl, IsUncondControl);

        0x1c: decode FUNCT3 {
            format SystemOp {
                0x0: decode FUNCT7 {
                    0x0: decode RS2 {
                        0x0: ecall({{
                            return std::make_shared<SyscallFault>(
                                (PrivilegeMode)xc->readMiscReg(MISCREG_PRV));
                        }}, IsSerializeAfter, IsNonSpeculative, IsSyscall,
                            No_OpClass);
                        0x1: ebreak({{
                            return std::make_shared<BreakpointFault>(
                                xc->pcState());
                        }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                        0x2: uret({{
                           MISA misa = xc->readMiscReg(MISCREG_ISA);
                            if (!misa.rvn) {
                                return std::make_shared<IllegalInstFault>(
                                    "sret can't execute without N systems",
                                    machInst);
                            }
                            STATUS status = xc->readMiscReg(MISCREG_STATUS);
                            status.uie = status.upie;
                            status.upie = 1;
                            xc->setMiscReg(MISCREG_STATUS, status);
                            NPC = xc->readMiscReg(MISCREG_UEPC);
                        }}, IsSerializeAfter, IsNonSpeculative, IsReturn);
                    }
                    0x8: decode RS2 {
                        0x2: sret({{
                            MISA misa = xc->readMiscReg(MISCREG_ISA);
                            if (!misa.rvs) {
                                return std::make_shared<IllegalInstFault>(
                                            "sret can't execute without RVS",
                                            machInst);
                            }
                            STATUS status = xc->readMiscReg(MISCREG_STATUS);
                            auto pm = (PrivilegeMode)xc->readMiscReg(
                                MISCREG_PRV);
                            if (pm == PRV_U ||
                                (pm == PRV_S && status.tsr == 1)) {
                                return std::make_shared<IllegalInstFault>(
                                            "sret in user mode or TSR enabled",
                                            machInst);
                                NPC = NPC;
                            } else {
                                xc->setMiscReg(MISCREG_PRV, status.spp);
                                status.sie = status.spie;
                                status.spie = 1;
                                status.spp = PRV_U;
                                xc->setMiscReg(MISCREG_STATUS, status);
                                NPC = xc->readMiscReg(MISCREG_SEPC);
                            }
                        }}, IsSerializeAfter, IsNonSpeculative, IsReturn);
                        0x5: wfi({{
                            MISA misa = xc->readMiscReg(MISCREG_ISA);
                            STATUS status = xc->readMiscReg(MISCREG_STATUS);
                            auto pm = (PrivilegeMode)xc->readMiscReg(
                                MISCREG_PRV);
                            if (misa.rvs && (pm == PRV_U ||
                                (pm == PRV_S && status.tw == 1))) {
                                return std::make_shared<IllegalInstFault>(
                                            "wfi in user mode or TW enabled",
                                            machInst);
                            }
                            // Go to sleep only if there's no pending interrupt
                            // at all, including masked interrupts.
                            auto tc = xc->tcBase();
                            auto cpu = tc->getCpuPtr();
                            auto ic = dynamic_cast<RiscvISA::Interrupts*>(
                                cpu->getInterruptController(tc->threadId()));
                            panic_if(!ic, "Invalid Interrupt Controller.");
                            if (ic->readIP() == 0
                                && xc->readMiscReg(MISCREG_NMIP) == 0) {
                                tc->quiesce();
                            }
                        }}, IsNonSpeculative, IsQuiesce,
                            IsSerializeAfter, No_OpClass, IsSquashAfter);
                    }
                    0x9: sfence_vma({{
                        MISA misa = xc->readMiscReg(MISCREG_ISA);
                        if (!misa.rvs) {
                            return std::make_shared<IllegalInstFault>(
                                "sfence_vma can't execute without RVS",
                                machInst);
                        }
                        STATUS status = xc->readMiscReg(MISCREG_STATUS);
                        auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
                        if (pm == PRV_U || (pm == PRV_S && status.tvm == 1)) {
                            return std::make_shared<IllegalInstFault>(
                                        "sfence in user mode or TVM enabled",
                                        machInst);
                        }
                        xc->tcBase()->getMMUPtr()->demapPage(Rs1, Rs2);
                    }}, IsNonSpeculative, IsSerializeAfter, No_OpClass);
                    0x18: mret({{
                        if (xc->readMiscReg(MISCREG_PRV) != PRV_M) {
                            return std::make_shared<IllegalInstFault>(
                                        "mret at lower privilege", machInst);
                            NPC = NPC;
                        } else {
                            STATUS status = xc->readMiscReg(MISCREG_STATUS);
                            xc->setMiscReg(MISCREG_PRV, status.mpp);
                            xc->setMiscReg(MISCREG_NMIE, 1);
                            status.mie = status.mpie;
                            status.mpie = 1;
                            status.mpp = PRV_U;
                            xc->setMiscReg(MISCREG_STATUS, status);
                            NPC = xc->readMiscReg(MISCREG_MEPC);
                        }
                    }}, IsSerializeAfter, IsNonSpeculative, IsReturn);
                }
            }
            format CSROp {
                0x1: csrrw({{
                    Rd = rvSext(data);
                    data = rvZext(Rs1);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                0x2: csrrs({{
                    Rd = rvSext(data);
                    data = rvZext(data | Rs1);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                0x3: csrrc({{
                    Rd = rvSext(data);
                    data = rvZext(data & ~Rs1);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                0x5: csrrwi({{
                    Rd = rvSext(data);
                    data = rvZext(uimm);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                0x6: csrrsi({{
                    Rd = rvSext(data);
                    data = rvZext(data | uimm);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
                0x7: csrrci({{
                    Rd = rvSext(data);
                    data = rvZext(data & ~uimm);
                }}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
            }
        }

        0x1e: M5Op::M5Op();
    }
}