Files
gem5/src/arch/riscv/isa/decoder.isa
Ivan Fernandez 1e743fd85a arch-riscv: adding vector unit-stride segment stores to RISC-V (#913)
This commit adds support for vector unit-stride segment store operations
for RISC-V (vssegXeXX). This implementation is based in two types of
microops:
- VsSegIntrlv microops that properly interleave source registers into
structs.
- VsSeg microops that store data in memory as contiguous structs of
several fields.

Change-Id: Id80dd4e781743a60eb76c18b6a28061f8e9f723d

Gem5 issue: https://github.com/gem5/gem5/issues/382
2024-03-22 15:45:58 -07:00

5034 lines
233 KiB
C++

// -*- mode:c++ -*-
// Copyright (c) 2015 RISC-V Foundation
// Copyright (c) 2017 The University of Virginia
// Copyright (c) 2020 Barkhausen Institut
// Copyright (c) 2021 StreamComputing Corp
// Copyright (c) 2022 Google LLC
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met: redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer;
// redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution;
// neither the name of the copyright holders nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
////////////////////////////////////////////////////////////////////
//
// The RISC-V ISA decoder
//
// In theory, all registers should be sign extended if not operating in the
// full MXLEN register, but that will cause memory address out of range as it is
// always regarded as uint64. So we'll zero extend PC related registers and
// memory address, and sign extend others.
decode QUADRANT default Unknown::unknown() {
0x0: decode COPCODE {
0x0: CIAddi4spnOp::c_addi4spn({{
imm = CIMM8<1:1> << 2 |
CIMM8<0:0> << 3 |
CIMM8<7:6> << 4 |
CIMM8<5:2> << 6;
}}, {{
if (imm == 0)
return std::make_shared<IllegalInstFault>("immediate = 0",
machInst);
Rp2 = rvSext(sp + imm);
}}, uint64_t);
format CompressedLoad {
0x1: c_fld({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
// Mutating any floating point register changes the FS bit
// of the STATUS CSR.
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
Fp2_bits = Mem;
}}, {{
EA = rvZext(Rp1 + offset);
}});
0x2: c_lw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
CIMM2<0:0> << 6;
}}, {{
Rp2_sd = Mem_sw;
}}, {{
EA = rvZext(Rp1 + offset);
}});
0x3: decode RVTYPE {
0x0: c_flw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
CIMM2<0:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
freg_t fd = freg(f32(Mem_uw));
Fp2_bits = fd.v;
}}, {{
EA = (uint32_t)(Rp1_uw + offset);
}});
0x1: c_ld({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
Rp2_sd = Mem_sd;
}}, {{
EA = Rp1 + offset;
}});
}
}
0x4: decode CFUNCT6LOW3 {
format CompressedLoad {
0x0: c_lbu({{
offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
}}, {{
Rp2 = Mem_ub;
}}, {{
EA = rvZext(Rp1 + offset);
}});
0x1: decode CFUNCT1BIT6 {
0x0: c_lhu({{
offset = CIMM2<0:0> << 1;
}}, {{
Rp2 = Mem_uh;
}}, {{
EA = rvZext(Rp1 + offset);
}});
0x1: c_lh({{
offset = CIMM2<0:0> << 1;
}}, {{
Rp2_sd = Mem_sh;
}}, {{
EA = rvZext(Rp1 + offset);
}});
}
}
format CompressedStore {
0x2: c_sb({{
offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
}}, {{
Mem_ub = Rp2_ub;
}}, ea_code={{
EA = rvZext(Rp1 + offset);
}});
0x3: c_sh({{
offset = (CIMM2<0:0> << 1);
}}, {{
Mem_uh = Rp2_uh;
}}, ea_code={{
EA = rvZext(Rp1 + offset);
}});
}
}
format CompressedStore {
0x5: c_fsd({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
Mem = Fp2_bits;
}}, {{
EA = rvZext(Rp1 + offset);
}});
0x6: c_sw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
CIMM2<0:0> << 6;
}}, {{
Mem_uw = Rp2_uw;
}}, ea_code={{
EA = rvZext(Rp1 + offset);
}});
0x7: decode RVTYPE {
0x0: c_fsw({{
offset = CIMM2<1:1> << 2 |
CIMM3 << 3 |
CIMM2<0:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
Mem_uw = unboxF32(boxF32(Fs2_bits));
}}, {{
EA = (uint32_t)(Rp1_uw + offset);
}});
0x1: c_sd({{
offset = CIMM3 << 3 | CIMM2 << 6;
}}, {{
Mem_ud = Rp2_ud;
}}, {{
EA = Rp1 + offset;
}});
}
}
}
0x1: decode COPCODE {
0x0: CIOp::c_addi({{
imm = sext<6>(CIMM5 | (CIMM1 << 5));
}}, {{
if ((RC1 == 0) != (imm == 0)) {
if (RC1 == 0) {
// imm != 0 is HINT
} else {
// imm == 0 is HINT
}
}
Rc1_sd = rvSext(Rc1_sd + imm);
}});
0x1: decode RVTYPE {
0x0: CJOp::c_jal({{
ra_sw = NPC_uw;
NPC_uw = PC_uw + imm;
}}, IsDirectControl, IsUncondControl, IsCall);
0x1: CIOp::c_addiw({{
imm = sext<6>(CIMM5 | (CIMM1 << 5));
}}, {{
if (RC1 == 0) {
return std::make_shared<IllegalInstFault>(
"source reg x0", machInst);
}
Rc1_sw = (int32_t)(Rc1_sw + imm);
}});
}
0x2: CIOp::c_li({{
imm = sext<6>(CIMM5 | (CIMM1 << 5));
}}, {{
// RC1 == 0 is HINT
Rc1_sd = imm;
}});
0x3: decode RC1 {
0x2: CIOp::c_addi16sp({{
imm = sext<10>((CIMM5<4:4> << 4) |
(CIMM5<0:0> << 5) |
(CIMM5<3:3> << 6) |
(CIMM5<2:1> << 7) |
(CIMM1 << 9));
}}, {{
if (imm == 0) {
return std::make_shared<IllegalInstFault>(
"immediate = 0", machInst);
}
sp_sd = rvSext(sp_sd + imm);
}});
default: CIOp::c_lui({{
imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12;
}}, {{
// RC1 == 0 is HINT
if (imm == 0) {
return std::make_shared<IllegalInstFault>(
"immediate = 0", machInst);
}
Rc1_sd = imm;
}});
}
0x4: decode CFUNCT2HIGH {
format CIOp {
0x0: c_srli({{
imm = CIMM5 | (CIMM1 << 5);
}}, {{
if (rvSelect((bool)CIMM1, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
if (imm == 0) {
// C.SRLI64, HINT for RV32/RV64
}
// The MSB can never be 1, hence no need to sign ext.
Rp1 = rvZext(Rp1) >> imm;
}}, uint64_t);
0x1: c_srai({{
imm = CIMM5 | (CIMM1 << 5);
}}, {{
if (rvSelect((bool)CIMM1, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
if (imm == 0) {
// C.SRAI64, HINT for RV32/RV64
}
Rp1_sd = rvSext(Rp1_sd) >> imm;
}}, uint64_t);
0x2: c_andi({{
imm = CIMM5;
if (CIMM1 > 0)
imm |= ~((uint64_t)0x1F);
}}, {{
Rp1 = rvSext(Rp1 & imm);
}}, uint64_t);
}
format CompressedROp {
0x3: decode CFUNCT1 {
0x0: decode CFUNCT2LOW {
0x0: c_sub({{
Rp1 = rvSext(Rp1 - Rp2);
}});
0x1: c_xor({{
Rp1 = rvSext(Rp1 ^ Rp2);
}});
0x2: c_or({{
Rp1 = rvSext(Rp1 | Rp2);
}});
0x3: c_and({{
Rp1 = rvSext(Rp1 & Rp2);
}});
}
0x1: decode CFUNCT2LOW {
0x0: decode RVTYPE {
0x1: c_subw({{
Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
}});
}
0x1: decode RVTYPE {
0x1: c_addw({{
Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
}});
}
0x2: c_mul({{
Rp1_sd = rvSext(Rp1_sd * Rp2_sd);
}}, IntMultOp);
0x3: decode RP2 {
0x0: c_zext_b({{
Rp1 = Rp1 & 0xFFULL;
}});
0x1: c_sext_b({{
Rp1 = sext<8>(Rp1 & 0xFFULL);
}});
0x2: c_zext_h({{
Rp1 = Rp1 & 0xFFFFULL;
}});
0x3: c_sext_h({{
Rp1 = sext<16>(Rp1 & 0xFFFFULL);
}});
0x4: decode RVTYPE {
0x1: c_zext_w({{
Rp1 = bits(Rp1, 31, 0);
}});
}
0x5: c_not({{
Rp1 = ~Rp1;
}});
}
}
}
}
}
0x5: CJOp::c_j({{
NPC = rvZext(PC + imm);
}}, IsDirectControl, IsUncondControl);
format CBOp {
0x6: c_beqz({{
if (rvSext(Rp1) == 0)
NPC = rvZext(PC + imm);
else
NPC = NPC;
}}, IsDirectControl, IsCondControl);
0x7: c_bnez({{
if (rvSext(Rp1) != 0)
NPC = rvZext(PC + imm);
else
NPC = NPC;
}}, IsDirectControl, IsCondControl);
}
}
0x2: decode COPCODE {
0x0: CIOp::c_slli({{
imm = CIMM5 | (CIMM1 << 5);
}}, {{
if (rvSelect((bool)CIMM1, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
if (imm == 0) {
// C.SLLI64, HINT for RV32/RV64
}
// RC1 == 0 is HINT
Rc1 = rvSext(Rc1 << imm);
}}, uint64_t);
format CompressedLoad {
0x1: c_fldsp({{
offset = CIMM5<4:3> << 3 |
CIMM1 << 5 |
CIMM5<2:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
Fc1_bits = Mem;
}}, {{
EA = rvZext(sp + offset);
}});
0x2: c_lwsp({{
offset = CIMM5<4:2> << 2 |
CIMM1 << 5 |
CIMM5<1:0> << 6;
}}, {{
if (RC1 == 0) {
return std::make_shared<IllegalInstFault>(
"source reg x0", machInst);
}
Rc1_sw = Mem_sw;
}}, {{
EA = rvZext(sp + offset);
}});
0x3: decode RVTYPE {
0x0: c_flwsp({{
offset = CIMM5<4:2> << 2 |
CIMM1 << 5 |
CIMM5<1:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
freg_t fd;
fd = freg(f32(Mem_uw));
Fd_bits = fd.v;
}}, {{
EA = (uint32_t)(sp_uw + offset);
}});
0x1: c_ldsp({{
offset = CIMM5<4:3> << 3 |
CIMM1 << 5 |
CIMM5<2:0> << 6;
}}, {{
if (RC1 == 0) {
return std::make_shared<IllegalInstFault>(
"source reg x0", machInst);
}
Rc1_sd = Mem_sd;
}}, {{
EA = sp + offset;
}});
}
}
0x4: decode CFUNCT1 {
0x0: decode RC2 {
0x0: Jump::c_jr({{
if (RC1 == 0) {
return std::make_shared<IllegalInstFault>(
"source reg x0", machInst);
}
NPC = rvZext(Rc1);
}}, IsIndirectControl, IsUncondControl);
default: CROp::c_mv({{
// RC1 == 0 is HINT
Rc1 = rvSext(Rc2);
}});
}
0x1: decode RC2 {
0x0: decode RC1 {
0x0: SystemOp::c_ebreak({{
return std::make_shared<BreakpointFault>(
xc->pcState());
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
default: Jump::c_jalr({{
ra = rvSext(NPC);
NPC = rvZext(Rc1);
}}, IsIndirectControl, IsUncondControl, IsCall);
}
default: CompressedROp::c_add({{
// RC1 == 0 is HINT
Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
}});
}
}
format CompressedStore {
0x5: c_fsdsp({{
offset = CIMM6<5:3> << 3 |
CIMM6<2:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
Mem_ud = Fc2_bits;
}}, {{
EA = rvZext(sp + offset);
}});
0x6: c_swsp({{
offset = CIMM6<5:2> << 2 |
CIMM6<1:0> << 6;
}}, {{
Mem_uw = Rc2_uw;
}}, {{
EA = rvZext(sp + offset);
}});
0x7: decode RVTYPE {
0x0: c_fswsp({{
offset = CIMM6<5:2> << 2 |
CIMM6<1:0> << 6;
}}, {{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>("FPU is off",
machInst);
Mem_uw = unboxF32(boxF32(Fs2_bits));
}}, {{
EA = (uint32_t)(sp_uw + offset);
}});
0x1: c_sdsp({{
offset = CIMM6<5:3> << 3 |
CIMM6<2:0> << 6;
}}, {{
Mem = Rc2;
}}, {{
EA = sp + offset;
}});
}
}
}
0x3: decode OPCODE5 {
0x00: decode FUNCT3 {
format Load {
0x0: lb({{
Rd_sd = Mem_sb;
}});
0x1: lh({{
Rd_sd = Mem_sh;
}});
0x2: lw({{
Rd_sd = Mem_sw;
}});
0x3: decode RVTYPE {
0x1: ld({{
Rd_sd = Mem_sd;
}});
}
0x4: lbu({{
Rd = Mem_ub;
}});
0x5: lhu({{
Rd = Mem_uh;
}});
0x6: decode RVTYPE {
0x1: lwu({{
Rd = Mem_uw;
}});
}
}
}
0x01: decode FUNCT3 {
format Load {
0x1: flh({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
freg_t fd;
fd = freg(f16(Mem_uh));
Fd_bits = fd.v;
}}, inst_flags=FloatMemReadOp);
0x2: flw({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
freg_t fd;
fd = freg(f32(Mem_uw));
Fd_bits = fd.v;
}}, inst_flags=FloatMemReadOp);
0x3: fld({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
status.fs = FPUStatus::DIRTY;
xc->setMiscReg(MISCREG_STATUS, status);
freg_t fd;
fd = freg(f64(Mem));
Fd_bits = fd.v;
}}, inst_flags=FloatMemReadOp);
}
0x0: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle8_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e8_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re8_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x1: vl2re8_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x3: vl4re8_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x7: vl8re8_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x0b: VlmOp::vlm_v({{
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
}}, inst_flags=VectorUnitStrideMaskLoadOp);
0x10: VleOp::vle8ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
} else {
Vd_ub[i] = Vs2_ub[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei8_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_ub[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
0x2: VlStrideOp::vlse8_v({{
Vd_ub[microIdx] = Mem_vc.as<uint8_t>()[0];
}}, inst_flags=VectorStridedLoadOp);
0x3: VlIndexOp::vloxei8_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_ub[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
}
0x5: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle16_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e16_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re16_v({{
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x1: vl2re16_v({{
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x3: vl4re16_v({{
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x7: vl8re16_v({{
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle16ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
} else {
Vd_uh[i] = Vs2_uh[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei16_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_uh[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
0x2: VlStrideOp::vlse16_v({{
Vd_uh[microIdx] = Mem_vc.as<uint16_t>()[0];
}}, inst_flags=VectorStridedLoadOp);
0x3: VlIndexOp::vloxei16_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_uh[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
}
0x6: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle32_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e32_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re32_v({{
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x1: vl2re32_v({{
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x3: vl4re32_v({{
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x7: vl8re32_v({{
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle32ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
} else {
Vd_uw[i] = Vs2_uw[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei32_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_uw[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
0x2: VlStrideOp::vlse32_v({{
Vd_uw[microIdx] = Mem_vc.as<uint32_t>()[0];
}}, inst_flags=VectorStridedLoadOp);
0x3: VlIndexOp::vloxei32_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_uw[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
}
0x7: decode MOP {
0x0: decode LUMOP {
0x00: decode NF {
0x00: VleOp::vle64_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideLoadOp);
format VlSegOp {
0x01: vlseg2e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x02: vlseg3e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x03: vlseg4e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x04: vlseg5e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x05: vlseg6e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x06: vlseg7e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
0x07: vlseg8e64_v({{
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
i < this->microVl) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
}
}
0x08: decode NF {
format VlWholeOp {
0x0: vl1re64_v({{
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x1: vl2re64_v({{
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x3: vl4re64_v({{
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
0x7: vl8re64_v({{
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
}}, inst_flags=VectorWholeRegisterLoadOp);
}
}
0x10: VleOp::vle64ff_v({{
if ((machInst.vm || elem_mask(v0, ei)) &&
i < this->microVl && i < this->faultIdx) {
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
} else {
Vd_ud[i] = Vs2_ud[i];
}
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
}
0x1: VlIndexOp::vluxei64_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_ud[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
0x2: VlStrideOp::vlse64_v({{
Vd_ud[microIdx] = Mem_vc.as<uint64_t>()[0];
}}, inst_flags=VectorStridedLoadOp);
0x3: VlIndexOp::vloxei64_v({{
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
}}, {{
EA = Rs1 + Vs2_ud[vs2ElemIdx];
}}, inst_flags=VectorIndexedLoadOp);
}
}
0x03: decode FUNCT3 {
format FenceOp {
0x0: fence({{
}}, uint64_t, IsReadBarrier, IsWriteBarrier, No_OpClass);
0x1: fence_i({{
}}, uint64_t, IsNonSpeculative, IsSerializeAfter,
IsSquashAfter, No_OpClass);
}
0x2: decode FUNCT12 {
format CBMOp {
0x0: cbo_inval({{
Mem = 0;
}}, mem_flags=[INVALIDATE, DST_POC]);
0x1: cbo_clean({{
Mem = 0;
}}, mem_flags=[CLEAN, DST_POC]);
0x2: cbo_flush({{
Mem = 0;
}}, mem_flags=[CLEAN, INVALIDATE, DST_POC]);
0x4: cbo_zero({{
Mem = 0;
}}, mem_flags=[CACHE_BLOCK_ZERO]);
}
}
}
0x04: decode FUNCT3 {
0x1: decode FS3 {
format IOp {
0x00: slli({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
Rd = rvSext(Rs1 << imm);
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x01: decode RVTYPE {
0x0: zip({{
Rd_sw = _rvk_emu_zip_32(Rs1_sw);
}}, imm_code = {{ imm = SHAMT5; }});
}
0x02: decode FS2 {
0x0: sha256sum0({{
Rd_sw = _rvk_emu_sha256sum0(Rs1_sw);
}});
0x1: sha256sum1({{
Rd_sw = _rvk_emu_sha256sum1(Rs1_sw);
}});
0x2: sha256sig0({{
Rd_sw = _rvk_emu_sha256sig0(Rs1_sw);
}});
0x3: sha256sig1({{
Rd_sw = _rvk_emu_sha256sig1(Rs1_sw);
}});
0x4: decode RVTYPE {
0x1: sha512sum0({{
Rd_sd = _rvk_emu_sha512sum0(Rs1_sd);
}});
}
0x5: decode RVTYPE {
0x1: sha512sum1({{
Rd_sd = _rvk_emu_sha512sum1(Rs1_sd);
}});
}
0x6: decode RVTYPE {
0x1: sha512sig0({{
Rd_sd = _rvk_emu_sha512sig0(Rs1_sd);
}});
}
0x7: decode RVTYPE {
0x1: sha512sig1({{
Rd_sd = _rvk_emu_sha512sig1(Rs1_sd);
}});
}
0x8: sm3p0({{
Rd_sw = _rvk_emu_sm3p0(Rs1_sw);
}});
0x9: sm3p1({{
Rd_sw = _rvk_emu_sm3p1(Rs1_sw);
}});
}
0x05: bseti({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 | (UINT64_C(1) << index));
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x06: decode BIT24 {
0x0: decode RVTYPE {
0x1: aes64im({{
Rd_sd = _rvk_emu_aes64im(Rs1_sd);
}});
}
0x1: decode RVTYPE {
0x1: aes64ks1i({{
Rd_sd = _rvk_emu_aes64ks1i(Rs1_sd, imm);
}}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
}
}
0x09: bclri({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 & (~(UINT64_C(1) << index)));
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x0d: binvi({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 ^ (UINT64_C(1) << index));
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
}
format ROp {
0x0c: decode RS2 {
0x00: clz({{
Rd = (machInst.rv_type == RV32) ? clz32(Rs1) : clz64(Rs1);
}});
0x01: ctz({{
Rd = (machInst.rv_type == RV32) ? ctz32(Rs1) : ctz64(Rs1);
}});
0x02: cpop({{
Rd = (machInst.rv_type == RV32) ? popCount(Rs1<31:0>) : popCount(Rs1);
}});
0x04: sext_b({{
Rd = sext<8>(Rs1_ub);
}});
0x05: sext_h({{
Rd = sext<16>(Rs1_uh);
}});
}
}
}
format IOp {
0x0: addi({{
Rd_sd = rvSext(Rs1_sd + imm);
}});
0x2: slti({{
Rd = (rvSext(Rs1_sd) < imm) ? 1 : 0;
}});
0x3: sltiu({{
Rd = (rvZext(Rs1) < imm) ? 1 : 0;
}}, uint64_t, imm_code = {{ imm = rvZext(sext<12>(IMM12)); }});
0x4: xori({{
Rd = rvSext(Rs1 ^ imm);
}}, uint64_t);
0x5: decode FS3 {
0x0: srli({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
Rd = rvSext(rvZext(Rs1) >> imm);
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x1: decode RVTYPE {
0x0: unzip({{
Rd_sw = _rvk_emu_unzip_32(Rs1_sw);
}}, imm_code = {{ imm = SHAMT5; }});
}
0x5: orc_b({{
uint64_t result = 0;
result |= (Rs1<7:0> ? UINT64_C(0xff) : 0x0);
result |= (Rs1<15:8> ? UINT64_C(0xff) : 0x0) << 8;
result |= (Rs1<23:16> ? UINT64_C(0xff) : 0x0) << 16;
result |= (Rs1<31:24> ? UINT64_C(0xff) : 0x0) << 24;
result |= (Rs1<39:32> ? UINT64_C(0xff) : 0x0) << 32;
result |= (Rs1<47:40> ? UINT64_C(0xff) : 0x0) << 40;
result |= (Rs1<55:48> ? UINT64_C(0xff) : 0x0) << 48;
result |= (Rs1<63:56> ? UINT64_C(0xff) : 0x0) << 56;
Rd = rvSext(result);
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x8: srai({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
Rd_sd = rvSext(Rs1_sd) >> imm;
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0x9: bexti({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
Rd = (Rs1 >> index) & 0x1;
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0xc: rori({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
uint64_t xlen = rvSelect(32, 64);
Rd = rvSext((rvZext(Rs1) >> imm)
| (Rs1 << ((xlen - imm) & (xlen - 1))));
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0xd: decode RS2 {
0x18: ROp::rev8({{
if (rvSelect((bool)SHAMT6BIT5, false)) {
return std::make_shared<IllegalInstFault>(
"shmat[5] != 0", machInst);
}
if (machInst.rv_type == RV32) {
Rd_sd = _rvk_emu_grev_32(Rs1_sd, 0x18);
} else {
Rd_sd = _rvk_emu_grev_64(Rs1_sd, 0x38);
}
}});
0x07: ROp::brev8({{
if (machInst.rv_type == RV32) {
Rd_sd = _rvk_emu_brev8_32(Rs1_sd);
} else {
Rd_sd = _rvk_emu_brev8_64(Rs1_sd);
}
}});
}
}
0x6: ori({{
Rd = rvSext(Rs1 | imm);
}}, uint64_t);
0x7: andi({{
Rd = rvSext(Rs1 & imm);
}}, uint64_t);
}
}
0x05: UOp::auipc({{
Rd = rvSext(PC + (sext<20>(imm) << 12));
}});
0x06: decode RVTYPE {
0x1: decode FUNCT3 {
format IOp {
0x0: addiw({{
Rd_sw = (int32_t)(Rs1_sw + imm);
}}, int32_t);
0x1: decode FS3 {
0x0: slliw({{
Rd_sd = Rs1_sw << imm;
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
0x1: slli_uw({{
Rd = ((uint64_t)(Rs1_uw)) << imm;
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
0xc: decode FS2 {
0x0: clzw({{
Rd = clz32(Rs1);
}});
0x1: ctzw({{
Rd = ctz32(Rs1);
}});
0x2: cpopw({{
Rd = popCount(Rs1<31:0>);
}});
}
}
0x5: decode FS3 {
0x0: srliw({{
Rd_sd = (int32_t)(Rs1_uw >> imm);
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
0x8: sraiw({{
Rd_sd = Rs1_sw >> imm;
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
0xc: roriw({{
Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
}
}
}
}
0x08: decode FUNCT3 {
format Store {
0x0: sb({{
Mem_ub = Rs2_ub;
}});
0x1: sh({{
Mem_uh = Rs2_uh;
}});
0x2: sw({{
Mem_uw = Rs2_uw;
}});
0x3: decode RVTYPE {
0x1: sd({{
Mem_ud = Rs2_ud;
}});
}
}
}
0x09: decode FUNCT3 {
format Store {
0x1: fsh({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
Mem_uh = unboxF16(boxF16(Fs2_bits));
}}, inst_flags=FloatMemWriteOp);
0x2: fsw({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
Mem_uw = unboxF32(boxF32(Fs2_bits));
}}, inst_flags=FloatMemWriteOp);
0x3: fsd({{
STATUS status = xc->readMiscReg(MISCREG_STATUS);
if (status.fs == FPUStatus::OFF)
return std::make_shared<IllegalInstFault>(
"FPU is off", machInst);
Mem_ud = Fs2_bits;
}}, inst_flags=FloatMemWriteOp);
}
0x0: decode MOP {
0x0: decode SUMOP {
0x00: decode NF {
0x00: VseOp::vse8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e8_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
format VsWholeOp {
0x8: decode NF {
0x0: vs1r_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorWholeRegisterStoreOp);
0x1: vs2r_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorWholeRegisterStoreOp);
0x3: vs4r_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorWholeRegisterStoreOp);
0x7: vs8r_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorWholeRegisterStoreOp);
}
}
0x0b: VsmOp::vsm_v({{
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
}}, inst_flags=VectorUnitStrideMaskStoreOp);
}
0x1: VsIndexOp::vsuxei8_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_ub[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
0x2: VsStrideOp::vsse8_v({{
Mem_vc.as<uint8_t>()[0] = Vs3_ub[microIdx];
}}, inst_flags=VectorStridedStoreOp);
0x3: VsIndexOp::vsoxei8_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_ub[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
}
0x5: decode MOP {
0x0: decode SUMOP {
0x00: decode NF {
0x00: VseOp::vse16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e16_v({{
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei16_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_uh[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
0x2: VsStrideOp::vsse16_v({{
Mem_vc.as<uint16_t>()[0] = Vs3_uh[microIdx];
}}, inst_flags=VectorStridedStoreOp);
0x3: VsIndexOp::vsoxei16_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_uh[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
}
0x6: decode MOP {
0x0: decode SUMOP {
0x00: decode NF {
0x00: VseOp::vse32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e32_v({{
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei32_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_uw[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
0x2: VsStrideOp::vsse32_v({{
Mem_vc.as<uint32_t>()[0] = Vs3_uw[microIdx];
}}, inst_flags=VectorStridedStoreOp);
0x3: VsIndexOp::vsoxei32_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_uw[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
}
0x7: decode MOP {
0x0: decode SUMOP {
0x00: decode NF {
0x00: VseOp::vse64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideStoreOp);
format VsSegOp {
0x01: vsseg2e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x02: vsseg3e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x03: vsseg4e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x04: vsseg5e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x05: vsseg6e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x06: vsseg7e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
0x07: vsseg8e64_v({{
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
}
}
}
0x1: VsIndexOp::vsuxei64_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_ud[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
0x2: VsStrideOp::vsse64_v({{
Mem_vc.as<uint64_t>()[0] = Vs3_ud[microIdx];
}}, inst_flags=VectorStridedStoreOp);
0x3: VsIndexOp::vsoxei64_v({{
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
}}, {{
EA = Rs1 + Vs2_ud[vs2ElemIdx];
}}, inst_flags=VectorIndexedStoreOp);
}
}
0x0b: decode FUNCT3 {
0x2: decode AMOFUNCT {
0x2: LoadReserved::lr_w({{
Rd_sd = Mem_sw;
}}, mem_flags=LLSC);
0x3: StoreCond::sc_w({{
Mem_uw = Rs2_uw;
}}, {{
Rd = rvSext(result);
}}, inst_flags=IsStoreConditional, mem_flags=LLSC);
0x0: AtomicMemOp::amoadd_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1: AtomicMemOp::amoswap_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x4: AtomicMemOp::amoxor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x8: AtomicMemOp::amoor_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0xc: AtomicMemOp::amoand_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x10: AtomicMemOp::amomin_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x14: AtomicMemOp::amomax_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<int32_t> *amo_op =
new AtomicGenericOp<int32_t>(Rs2_sw,
[](int32_t* b, int32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x18: AtomicMemOp::amominu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1c: AtomicMemOp::amomaxu_w({{
Rd_sd = Mem_sw;
}}, {{
TypedAtomicOpFunctor<uint32_t> *amo_op =
new AtomicGenericOp<uint32_t>(Rs2_uw,
[](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
}
0x3: decode RVTYPE {
0x1: decode AMOFUNCT {
0x2: LoadReserved::lr_d({{
Rd_sd = Mem_sd;
}}, mem_flags=LLSC);
0x3: StoreCond::sc_d({{
Mem = Rs2;
}}, {{
Rd = result;
}}, mem_flags=LLSC, inst_flags=IsStoreConditional);
0x0: AtomicMemOp::amoadd_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ *b += a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x1: AtomicMemOp::amoswap_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x4: AtomicMemOp::amoxor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b ^= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x8: AtomicMemOp::amoor_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b |= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0xc: AtomicMemOp::amoand_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){ *b &= a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x10: AtomicMemOp::amomin_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a < *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x14: AtomicMemOp::amomax_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<int64_t> *amo_op =
new AtomicGenericOp<int64_t>(Rs2_sd,
[](int64_t* b, int64_t a){ if (a > *b) *b = a; });
}}, mem_flags=ATOMIC_RETURN_OP);
0x18: AtomicMemOp::amominu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){
if (a < *b) *b = a;
});
}}, mem_flags=ATOMIC_RETURN_OP);
0x1c: AtomicMemOp::amomaxu_d({{
Rd_sd = Mem_sd;
}}, {{
TypedAtomicOpFunctor<uint64_t> *amo_op =
new AtomicGenericOp<uint64_t>(Rs2_ud,
[](uint64_t* b, uint64_t a){
if (a > *b) *b = a;
});
}}, mem_flags=ATOMIC_RETURN_OP);
}
}
}
0x0c: decode FUNCT3 {
format ROp {
0x0: decode KFUNCT5 {
0x00: decode BS {
0x0: add({{
Rd = rvSext(Rs1_sd + Rs2_sd);
}});
0x1: sub({{
Rd = rvSext(Rs1_sd - Rs2_sd);
}});
}
0x01: decode BS {
0x0: mul({{
Rd = rvSext(Rs1_sd * Rs2_sd);
}}, IntMultOp);
}
0x08: decode BS {
0x1: decode RVTYPE {
0x0: sha512sum0r({{
Rd_sw = _rvk_emu_sha512sum0r(Rs1_sw, Rs2_sw);
}});
}
}
0x09: decode BS {
0x1: decode RVTYPE {
0x0: sha512sum1r({{
Rd_sw = _rvk_emu_sha512sum1r(Rs1_sw, Rs2_sw);
}});
}
}
0x0a: decode BS {
0x1: decode RVTYPE {
0x0: sha512sig0l({{
Rd_sw = _rvk_emu_sha512sig0l(Rs1_sw, Rs2_sw);
}});
}
}
0x0b: decode BS {
0x1: decode RVTYPE {
0x0: sha512sig1l({{
Rd_sw = _rvk_emu_sha512sig1l(Rs1_sw, Rs2_sw);
}});
}
}
0x0e: decode BS {
0x1: decode RVTYPE {
0x0: sha512sig0h({{
Rd_sw = _rvk_emu_sha512sig0h(Rs1_sw, Rs2_sw);
}});
}
}
0x0f: decode BS {
0x1: decode RVTYPE {
0x0: sha512sig1h({{
Rd_sw = _rvk_emu_sha512sig1h(Rs1_sw, Rs2_sw);
}});
}
}
0x11: decode RVTYPE {
0x0: BSOp::aes32esi({{
Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, bs);
}});
}
0x13: decode RVTYPE {
0x0: BSOp::aes32esmi({{
Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, bs);
}});
}
0x15: decode RVTYPE {
0x0: BSOp::aes32dsi({{
Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, bs);
}});
}
0x17: decode RVTYPE {
0x0: BSOp::aes32dsmi({{
Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, bs);
}});
}
0x18: BSOp::sm4ed({{
Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, bs);
}});
0x19: decode BS {
0x0: decode RVTYPE {
0x1: aes64es({{
Rd_sd = _rvk_emu_aes64es(Rs1_sd, Rs2_sd);
}});
}
}
0x1a: BSOp::sm4ks({{
Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, bs);
}});
0x1b: decode BS {
0x0: decode RVTYPE {
0x1: aes64esm({{
Rd_sd = _rvk_emu_aes64esm(Rs1_sd, Rs2_sd);
}});
}
}
0x1d: decode BS {
0x0: decode RVTYPE {
0x1: aes64ds({{
Rd_sd = _rvk_emu_aes64ds(Rs1_sd, Rs2_sd);
}});
}
}
0x1f: decode BS {
0x0: decode RVTYPE {
0x1: aes64dsm({{
Rd_sd = _rvk_emu_aes64dsm(Rs1_sd, Rs2_sd);
}});
}
0x1: decode RVTYPE {
0x1: aes64ks2({{
Rd_sd = _rvk_emu_aes64ks2(Rs1_sd, Rs2_sd);
}});
}
}
}
0x1: decode FUNCT7 {
0x0: sll({{
Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
}});
0x1: mulh({{
if (machInst.rv_type == RV32) {
Rd_sd = mulh<int32_t>(Rs1_sd, Rs2_sd);
} else {
Rd_sd = mulh<int64_t>(Rs1_sd, Rs2_sd);
}
}}, IntMultOp);
0x5: clmul({{
uint64_t result = 0;
for (int i = 0; i < rvSelect(32, 64); i++) {
if ((Rs2 >> i) & 1) {
result ^= Rs1 << i;
}
}
Rd = rvSext(result);
}});
0x14: bset({{
Rs2 &= rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 | (UINT64_C(1) << Rs2));
}});
0x24: bclr({{
Rs2 &= rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 & (~(UINT64_C(1) << Rs2)));
}});
0x30: rol({{
uint64_t xlen = rvSelect(32, 64);
int shamt = Rs2 & (xlen - 1);
Rd = rvSext((Rs1 << shamt)
| (rvZext(Rs1) >> ((xlen - shamt) & (xlen - 1))));
}});
0x34: binv({{
Rs2 &= rvSelect(32 - 1, 64 - 1);
Rd = rvSext(Rs1 ^ (UINT64_C(1) << Rs2));
}});
}
0x2: decode FUNCT7 {
0x0: slt({{
Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
}});
0x1: mulhsu({{
if (machInst.rv_type == RV32) {
Rd_sd = mulhsu<int32_t>(Rs1_sd, Rs2);
} else {
Rd_sd = mulhsu<int64_t>(Rs1_sd, Rs2);
}
}}, IntMultOp);
0x5: clmulr({{
uint64_t result = 0;
uint64_t xlen = rvSelect(32, 64);
uint64_t zextRs1 = rvZext(Rs1);
for (int i = 0; i < xlen; i++) {
if ((Rs2 >> i) & 1) {
result ^= zextRs1 >> (xlen-i-1);
}
}
Rd = rvSext(result);
}});
0x10: sh1add({{
Rd = rvSext((Rs1 << 1) + Rs2);
}});
0x14: xperm4({{
if (machInst.rv_type == RV32) {
Rd_sd = _rvk_emu_xperm4_32(Rs1_sd, Rs2_sd);
} else {
Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
}
}});
}
0x3: decode FUNCT7 {
0x0: sltu({{
Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
}});
0x1: mulhu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)mulhu<uint32_t>(Rs1, Rs2);
} else {
Rd = mulhu<uint64_t>(Rs1, Rs2);
}
}}, IntMultOp);
0x5: clmulh({{
uint64_t result = 0;
uint64_t xlen = rvSelect(32, 64);
uint64_t zextRs1 = rvZext(Rs1);
for (int i = 1; i < xlen; i++) {
if ((Rs2 >> i) & 1) {
result ^= zextRs1 >> (xlen-i);
}
}
// The MSB can never be 1, no need to sign extend.
Rd = result;
}});
}
0x4: decode FUNCT7 {
0x0: xor({{
Rd = rvSext(Rs1 ^ Rs2);
}});
0x1: div({{
if (machInst.rv_type == RV32) {
Rd_sd = div<int32_t>(Rs1, Rs2);
} else {
Rd_sd = div<int64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x4: pack({{
int xlen = rvSelect(32, 64);
Rd = rvSext(
(bits(Rs2, xlen/2-1, 0) << (xlen / 2)) | \
bits(Rs1, xlen/2-1, 0)
);
}});
0x5: min({{
Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd));
}});
0x10: sh2add({{
Rd = rvSext((Rs1 << 2) + Rs2);
}});
0x14: xperm8({{
if (machInst.rv_type == RV32) {
Rd_sd = _rvk_emu_xperm8_32(Rs1_sd, Rs2_sd);
} else {
Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
}
}});
0x20: xnor({{
Rd = rvSext(~(Rs1 ^ Rs2));
}});
}
0x5: decode FUNCT7 {
0x0: srl({{
Rd = rvSext(rvZext(Rs1) >>
rvSelect(Rs2<4:0>, Rs2<5:0>));
}});
0x1: divu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
} else {
Rd = divu<uint64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x20: sra({{
Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
}});
0x5: minu({{
Rd = rvSext(std::min(rvZext(Rs1), rvZext(Rs2)));
}});
0x24: bext({{
Rs2 &= (rvSelect(32, 64) - 1);
// It doesn't need to sign ext because MSB is always 0
Rd = (Rs1 >> Rs2) & 0x1;
}});
0x30: ror({{
uint64_t xlen = rvSelect(32, 64);
int shamt = Rs2 & (xlen - 1);
Rd = rvSext((rvZext(Rs1) >> shamt)
| (Rs1 << ((xlen - shamt) & (xlen - 1))));
}});
}
0x6: decode FUNCT7 {
0x0: or({{
Rd = rvSext(Rs1 | Rs2);
}});
0x1: rem({{
if (machInst.rv_type == RV32) {
Rd_sd = rem<int32_t>(Rs1, Rs2);
} else {
Rd_sd = rem<int64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x5: max({{
Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
}});
0x10: sh3add({{
Rd = rvSext((Rs1 << 3) + Rs2);
}});
0x20: orn({{
Rd = rvSext(Rs1 | (~Rs2));
}});
}
0x7: decode FUNCT7 {
0x0: and({{
Rd = rvSext(Rs1 & Rs2);
}});
0x1: remu({{
if (machInst.rv_type == RV32) {
Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
} else {
Rd = remu<uint64_t>(Rs1, Rs2);
}
}}, IntDivOp);
0x4: packh({{
// It doesn't need to sign ext as MSB is always 0
Rd = (Rs2_ub << 8) | Rs1_ub;
}});
0x5: maxu({{
Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2)));
}});
0x20: andn({{
Rd = rvSext(Rs1 & (~Rs2));
}});
}
}
}
0x0d: UOp::lui({{
Rd = (sext<20>(imm) << 12);
}});
0x0e: decode RVTYPE {
0x1: decode FUNCT3 {
format ROp {
0x0: decode FUNCT7 {
0x0: addw({{
Rd_sd = Rs1_sw + Rs2_sw;
}});
0x1: mulw({{
Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
}}, IntMultOp);
0x4: add_uw({{
Rd = Rs1_uw + Rs2;
}});
0x20: subw({{
Rd_sd = Rs1_sw - Rs2_sw;
}});
}
0x1: decode FUNCT7 {
0x0: sllw({{
Rd_sd = Rs1_sw << Rs2<4:0>;
}});
0x30: rolw({{
int shamt = Rs2 & (32 - 1);
Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
}});
}
0x2: decode FUNCT7 {
0x10: sh1add_uw({{
Rd = (((uint64_t)Rs1_uw) << 1) + Rs2;
}});
}
0x4: decode FUNCT7 {
0x1: divw({{
Rd_sd = div<int32_t>(Rs1, Rs2);
}}, IntDivOp);
0x4: packw({{
Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
}});
0x10: sh2add_uw({{
Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;
}});
}
0x5: decode FUNCT7 {
0x0: srlw({{
Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
}});
0x1: divuw({{
Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
}}, IntDivOp);
0x20: sraw({{
Rd_sd = Rs1_sw >> Rs2<4:0>;
}});
0x30: rorw({{
int shamt = Rs2 & (32 - 1);
Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
}});
}
0x6: decode FUNCT7 {
0x1: remw({{
Rd_sd = rem<int32_t>(Rs1, Rs2);
}}, IntDivOp);
0x10: sh3add_uw({{
Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
}});
}
0x7: remuw({{
Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
}}, IntDivOp);
}
}
}
format FPROp {
0x10: decode FUNCT2 {
0x0: fmadd_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits)),
f32(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x1: fmadd_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits)),
f64(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x2: fmadd_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits)),
f16(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
}
0x11: decode FUNCT2 {
0x0: fmsub_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits)),
f32(f32(freg(Fs3_bits)).v ^
mask(31, 31))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x1: fmsub_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits)),
f64(f64(freg(Fs3_bits)).v ^
mask(63, 63))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x2: fmsub_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits)),
f16(f16(freg(Fs3_bits)).v ^
mask(15, 15))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
}
0x12: decode FUNCT2 {
0x0: fnmsub_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
mask(31, 31)),
f32(freg(Fs2_bits)),
f32(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x1: fnmsub_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
mask(63, 63)),
f64(freg(Fs2_bits)),
f64(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x2: fnmsub_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
mask(15, 15)),
f16(freg(Fs2_bits)),
f16(freg(Fs3_bits))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
}
0x13: decode FUNCT2 {
0x0: fnmadd_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
mask(31, 31)),
f32(freg(Fs2_bits)),
f32(f32(freg(Fs3_bits)).v ^
mask(31, 31))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x1: fnmadd_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
mask(63, 63)),
f64(freg(Fs2_bits)),
f64(f64(freg(Fs3_bits)).v ^
mask(63, 63))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
0x2: fnmadd_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
mask(15, 15)),
f16(freg(Fs2_bits)),
f16(f16(freg(Fs3_bits)).v ^
mask(15, 15))));
Fd_bits = fd.v;
}}, FloatMultAccOp);
}
0x14: decode FUNCT7 {
0x0: fadd_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_add(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x1: fadd_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_add(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x2: fadd_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_add(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x4: fsub_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_sub(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x5: fsub_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_sub(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x6: fsub_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_sub(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatAddOp);
0x8: fmul_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_mul(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatMultOp);
0x9: fmul_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_mul(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatMultOp);
0xa: fmul_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_mul(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatMultOp);
0xc: fdiv_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_div(f32(freg(Fs1_bits)),
f32(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatDivOp);
0xd: fdiv_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_div(f64(freg(Fs1_bits)),
f64(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatDivOp);
0xe: fdiv_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_div(f16(freg(Fs1_bits)),
f16(freg(Fs2_bits))));
Fd_bits = fd.v;
}}, FloatDivOp);
0x10: decode ROUND_MODE {
0x0: fsgnj_s({{
auto sign = bits(unboxF32(Fs2_bits), 31);
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
sign));
}}, FloatMiscOp);
0x1: fsgnjn_s({{
auto sign = ~bits(unboxF32(Fs2_bits), 31);
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
sign));
}}, FloatMiscOp);
0x2: fsgnjx_s({{
auto sign = bits(
unboxF32(Fs1_bits) ^ unboxF32(Fs2_bits), 31);
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
sign));
}}, FloatMiscOp);
}
0x11: decode ROUND_MODE {
0x0: fsgnj_d({{
Fd_bits = insertBits(Fs2_bits, 62, 0, Fs1_bits);
}}, FloatMiscOp);
0x1: fsgnjn_d({{
Fd_bits = insertBits(~Fs2_bits, 62, 0, Fs1_bits);
}}, FloatMiscOp);
0x2: fsgnjx_d({{
Fd_bits = insertBits(
Fs1_bits ^ Fs2_bits, 62, 0, Fs1_bits);
}}, FloatMiscOp);
}
0x12: decode ROUND_MODE {
0x0: fsgnj_h({{
auto sign = bits(unboxF16(Fs2_bits), 15);
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
sign));
}}, FloatMiscOp);
0x1: fsgnjn_h({{
auto sign = ~bits(unboxF16(Fs2_bits), 15);
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
sign));
}}, FloatMiscOp);
0x2: fsgnjx_h({{
auto sign = bits(
unboxF16(Fs1_bits) ^ unboxF16(Fs2_bits), 15);
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
sign));
}}, FloatMiscOp);
}
0x14: decode ROUND_MODE {
0x0: fmin_s({{
float32_t fs1 = f32(freg(Fs1_bits));
float32_t fs2 = f32(freg(Fs2_bits));
float32_t fd;
bool less = f32_lt_quiet(fs1, fs2) ||
(f32_eq(fs1, fs2) && bits(fs1.v, 31));
fd = less || isNaNF32UI(fs2.v) ? fs1 : fs2;
if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
fd = f32(defaultNaNF32UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
0x1: fmax_s({{
float32_t fs1 = f32(freg(Fs1_bits));
float32_t fs2 = f32(freg(Fs2_bits));
float32_t fd;
bool greater = f32_lt_quiet(fs2, fs1) ||
(f32_eq(fs2, fs1) && bits(fs2.v, 31));
fd = greater || isNaNF32UI(fs2.v) ? fs1: fs2;
if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
fd = f32(defaultNaNF32UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
}
0x15: decode ROUND_MODE {
0x0: fmin_d({{
float64_t fs1 = f64(freg(Fs1_bits));
float64_t fs2 = f64(freg(Fs2_bits));
float64_t fd;
bool less = f64_lt_quiet(fs1, fs2) ||
(f64_eq(fs1, fs2) && bits(fs1.v, 63));
fd = less || isNaNF64UI(fs2.v) ? fs1 : fs2;
if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
fd = f64(defaultNaNF64UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
0x1: fmax_d({{
float64_t fs1 = f64(freg(Fs1_bits));
float64_t fs2 = f64(freg(Fs2_bits));
float64_t fd;
bool greater = f64_lt_quiet(fs2, fs1) ||
(f64_eq(fs2, fs1) && bits(fs2.v, 63));
fd = greater || isNaNF64UI(fs2.v) ? fs1 : fs2;
if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
fd = f64(defaultNaNF64UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
}
0x16: decode ROUND_MODE {
0x0: fmin_h({{
float16_t fs1 = f16(freg(Fs1_bits));
float16_t fs2 = f16(freg(Fs2_bits));
float16_t fd;
bool less = f16_lt_quiet(fs1, fs2) ||
(f16_eq(fs1, fs2) && bits(fs1.v, 15));
fd = less || isNaNF16UI(fs2.v) ? fs1 : fs2;
if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
fd = f16(defaultNaNF16UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
0x1: fmax_h({{
float16_t fs1 = f16(freg(Fs1_bits));
float16_t fs2 = f16(freg(Fs2_bits));
float16_t fd;
bool greater = f16_lt_quiet(fs2, fs1) ||
(f16_eq(fs2, fs1) && bits(fs2.v, 15));
fd = greater || isNaNF16UI(fs2.v) ? fs1 : fs2;
if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
fd = f16(defaultNaNF16UI);
Fd_bits = freg(fd).v;
}}, FloatCmpOp);
}
0x20: decode CONV_SGN {
0x1: fcvt_s_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_to_f32(f64(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x2: fcvt_s_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_to_f32(f16(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x21: decode CONV_SGN {
0x0: fcvt_d_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_to_f64(f32(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x2: fcvt_d_h({{
RM_REQUIRED;
freg_t fd;
fd = freg(f16_to_f64(f16(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x22: decode CONV_SGN {
0x0: fcvt_h_s({{
RM_REQUIRED;
freg_t fd;
fd = freg(f32_to_f16(f32(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x1: fcvt_h_d({{
RM_REQUIRED;
freg_t fd;
fd = freg(f64_to_f16(f64(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x2c: fsqrt_s({{
if (RS2 != 0) {
return std::make_shared<IllegalInstFault>(
"source reg x1", machInst);
}
freg_t fd;
RM_REQUIRED;
fd = freg(f32_sqrt(f32(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatSqrtOp);
0x2d: fsqrt_d({{
if (RS2 != 0) {
return std::make_shared<IllegalInstFault>(
"source reg x1", machInst);
}
freg_t fd;
RM_REQUIRED;
fd = freg(f64_sqrt(f64(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatSqrtOp);
0x2e: fsqrt_h({{
if (RS2 != 0) {
return std::make_shared<IllegalInstFault>(
"source reg x1", machInst);
}
freg_t fd;
RM_REQUIRED;
fd = freg(f16_sqrt(f16(freg(Fs1_bits))));
Fd_bits = fd.v;
}}, FloatSqrtOp);
0x50: decode ROUND_MODE {
0x0: fle_s({{
Rd = f32_le(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
}}, FloatCmpOp);
0x1: flt_s({{
Rd = f32_lt(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
}}, FloatCmpOp);
0x2: feq_s({{
Rd = f32_eq(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
}}, FloatCmpOp);
}
0x51: decode ROUND_MODE {
0x0: fle_d({{
Rd = f64_le(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
}}, FloatCmpOp);
0x1: flt_d({{
Rd = f64_lt(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
}}, FloatCmpOp);
0x2: feq_d({{
Rd = f64_eq(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
}}, FloatCmpOp);
}
0x52: decode ROUND_MODE {
0x0: fle_h({{
Rd = f16_le(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
}}, FloatCmpOp);
0x1: flt_h({{
Rd = f16_lt(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
}}, FloatCmpOp);
0x2: feq_h({{
Rd = f16_eq(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
}}, FloatCmpOp);
}
0x60: decode CONV_SGN {
0x0: fcvt_w_s({{
RM_REQUIRED;
Rd_sd = sext<32>(f32_to_i32(f32(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x1: fcvt_wu_s({{
RM_REQUIRED;
Rd = sext<32>(f32_to_ui32(f32(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_l_s({{
RM_REQUIRED;
Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_lu_s({{
RM_REQUIRED;
Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
}
0x61: decode CONV_SGN {
0x0: fcvt_w_d({{
RM_REQUIRED;
Rd_sd = sext<32>(f64_to_i32(f64(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x1: fcvt_wu_d({{
RM_REQUIRED;
Rd = sext<32>(f64_to_ui32(f64(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_l_d({{
RM_REQUIRED;
Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_lu_d({{
RM_REQUIRED;
Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
}
0x62: decode CONV_SGN {
0x0: fcvt_w_h({{
RM_REQUIRED;
Rd_sd = sext<32>(f16_to_i32(f16(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x1: fcvt_wu_h({{
RM_REQUIRED;
Rd = sext<32>(f16_to_ui32(f16(freg(Fs1_bits)), rm,
true));
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_l_h({{
RM_REQUIRED;
Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_lu_h({{
RM_REQUIRED;
Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true);
}}, FloatCvtOp);
}
}
0x68: decode CONV_SGN {
0x0: fcvt_s_w({{
RM_REQUIRED;
freg_t fd;
fd = freg(i32_to_f32(Rs1_sw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x1: fcvt_s_wu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui32_to_f32(Rs1_uw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_s_l({{
RM_REQUIRED;
freg_t fd;
fd = freg(i64_to_f32(Rs1_ud));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_s_lu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui64_to_f32(Rs1));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
}
0x69: decode CONV_SGN {
0x0: fcvt_d_w({{
RM_REQUIRED;
freg_t fd;
fd = freg(i32_to_f64(Rs1_sw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x1: fcvt_d_wu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui32_to_f64(Rs1_uw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_d_l({{
RM_REQUIRED;
freg_t fd;
fd = freg(i64_to_f64(Rs1_sd));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_d_lu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui64_to_f64(Rs1));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
}
0x6a: decode CONV_SGN {
0x0: fcvt_h_w({{
RM_REQUIRED;
freg_t fd;
fd = freg(i32_to_f16((int32_t)Rs1_sw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x1: fcvt_h_wu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui32_to_f16((uint32_t)Rs1_uw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x2: decode RVTYPE {
0x1: fcvt_h_l({{
RM_REQUIRED;
freg_t fd;
fd = freg(i64_to_f16(Rs1_ud));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x3: decode RVTYPE {
0x1: fcvt_h_lu({{
RM_REQUIRED;
freg_t fd;
fd = freg(ui64_to_f16(Rs1));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
}
0x70: decode ROUND_MODE {
0x0: fmv_x_w({{
uint64_t result = (uint32_t)Fs1_bits;
if ((result&0x80000000) != 0) {
result |= (0xFFFFFFFFULL << 32);
}
Rd = result;
}}, FloatCvtOp);
0x1: fclass_s({{
Rd = rvSext(f32_classify(f32(freg(Fs1_bits))));
}}, FloatMiscOp);
}
0x71: decode ROUND_MODE {
0x0: decode RVTYPE {
0x1: fmv_x_d({{
Rd = freg(Fs1_bits).v;
}}, FloatCvtOp);
}
0x1: fclass_d({{
Rd = f64_classify(f64(freg(Fs1_bits)));
}}, FloatMiscOp);
}
0x72: decode ROUND_MODE {
0x0: fmv_x_h({{
uint64_t result = (uint16_t)Fs1_bits;
if ((result&0x8000) != 0) {
result |= (0xFFFFFFFFFFFFULL << 16);
}
Rd = result;
}}, FloatCvtOp);
0x1: fclass_h({{
Rd = f16_classify(f16(freg(Fs1_bits)));
}}, FloatMiscOp);
}
0x78: fmv_w_x({{
freg_t fd;
fd = freg(f32(Rs1_uw));
Fd_bits = fd.v;
}}, FloatCvtOp);
0x79: decode RVTYPE {
0x1: fmv_d_x({{
freg_t fd;
fd = freg(f64(Rs1));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
0x7a: fmv_h_x({{
freg_t fd;
fd = freg(f16(Rs1_uh));
Fd_bits = fd.v;
}}, FloatCvtOp);
}
}
0x15: decode FUNCT3 {
// OPIVV
0x0: decode VFUNCT6 {
format VectorIntFormat {
0x0: vadd_vv({{
Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0x2: vsub_vv({{
Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0x4: vminu_vv({{
Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ?
Vs2_vu[i] : Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0x5: vmin_vv({{
Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ?
Vs2_vi[i] : Vs1_vi[i];
}}, OPIVV, VectorIntegerArithOp);
0x6: vmaxu_vv({{
Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ?
Vs2_vu[i] : Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0x7: vmax_vv({{
Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ?
Vs2_vi[i] : Vs1_vi[i];
}}, OPIVV, VectorIntegerArithOp);
0x9: vand_vv({{
Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0xa: vor_vv({{
Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0xb: vxor_vv({{
Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
}
0x0c: VectorGatherFormat::vrgather_vv({{
for (uint32_t i = 0; i < microVl; i++) {
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
if (this->vm || elem_mask(v0, ei)) {
const uint64_t idx = Vs1_vu[i]
- vs2_elems * vs2_idx;
auto res = (Vs1_vu[i] >= vlmax) ? 0
: (idx < vs2_elems) ? Vs2_vu[idx]
: Vs3_vu[i];
Vd_vu[i] = res;
}
}
}}, OPIVV, VectorMiscOp);
0x0e: VectorGatherFormat::vrgatherei16_vv({{
for (uint32_t i = 0; i < microVl; i++) {
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
if (this->vm || elem_mask(v0, ei)) {
const uint32_t idx = Vs1_uh[i + vs1_bias]
- vs2_elems * vs2_idx;
auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0
: (idx < vs2_elems) ? Vs2_vu[idx]
: Vs3_vu[i + vd_bias];
Vd_vu[i + vd_bias] = res;
}
}
}}, OPIVV, VectorMiscOp);
format VectorIntFormat {
0x10: decode VM {
0x0: vadc_vvm({{
Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i]
+ elem_mask(v0, ei);
}}, OPIVV, VectorIntegerArithOp);
// the unmasked versions (vm=1) are reserved
}
0x12: decode VM {
0x0: vsbc_vvm({{
Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i]
- elem_mask(v0, ei);
}}, OPIVV, VectorIntegerArithOp);
// the unmasked versions (vm=1) are reserved
}
0x17: decode VM {
0x0: vmerge_vvm({{
Vd_vu[i] = elem_mask(v0, ei)
? Vs1_vu[i]
: Vs2_vu[i];
}}, OPIVV, VectorIntegerArithOp);
0x1: decode VS2 {
0x0: vmv_v_v({{
Vd_vu[i] = Vs1_vu[i];
}}, OPIVV, VectorIntegerArithOp);
}
}
}
format VectorIntVxsatFormat{
0x20: vsaddu_vv({{
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Vs1_vu[i],
vxsatptr);
}}, OPIVV, VectorIntegerArithOp);
0x21: vsadd_vv({{
Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Vs1_vu[i],
vxsatptr);
}}, OPIVV, VectorIntegerArithOp);
0x22: vssubu_vv({{
Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Vs1_vu[i],
vxsatptr);
}}, OPIVV, VectorIntegerArithOp);
0x23: vssub_vv({{
Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Vs1_vu[i],
vxsatptr);
}}, OPIVV, VectorIntegerArithOp);
0x27: vsmul_vv({{
vi max = std::numeric_limits<vi>::max();
vi min = std::numeric_limits<vi>::min();
bool overflow = Vs1_vi[i] == Vs2_vi[i] &&
Vs1_vi[i] == min;
__int128_t result = (__int128_t)Vs1_vi[i] *
(__int128_t)Vs2_vi[i];
result = int_rounding<__int128_t>(
result, 0 /* TODO */, sew - 1);
result = result >> (sew - 1);
if (overflow) {
result = max;
*vxsatptr = true;
}
Vd_vi[i] = (vi)result;
}}, OPIVV, VectorIntegerArithOp);
}
format VectorIntFormat {
0x25: vsll_vv({{
Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1));
}}, OPIVV, VectorIntegerArithOp);
0x28: vsrl_vv({{
Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1));
}}, OPIVV, VectorIntegerArithOp);
0x29: vsra_vv({{
Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1));
}}, OPIVV, VectorIntegerArithOp);
0x2a: vssrl_vv({{
int sh = Vs1_vu[i] & (sew - 1);
__uint128_t val = Vs2_vu[i];
val = int_rounding<__uint128_t>(val,
xc->readMiscReg(MISCREG_VXRM), sh);
Vd_vu[i] = val >> sh;
}}, OPIVV, VectorIntegerArithOp);
0x2b: vssra_vv({{
int sh = Vs1_vi[i] & (sew - 1);
__int128_t val = Vs2_vi[i];
val = int_rounding<__int128_t>(val,
xc->readMiscReg(MISCREG_VXRM), sh);
Vd_vi[i] = val >> sh;
}}, OPIVV, VectorIntegerArithOp);
}
format VectorReduceIntWideningFormat {
0x30: vwredsumu_vs({{
Vd_vwu[0] = reduce_loop(std::plus<vwu>(),
Vs1_vwu, Vs2_vu);
}}, OPIVV, VectorIntegerReduceOp);
0x31: vwredsum_vs({{
Vd_vwu[0] = reduce_loop(std::plus<vwi>(),
Vs1_vwi, Vs2_vi);
}}, OPIVV, VectorIntegerReduceOp);
}
format VectorIntMaskFormat {
0x11: decode VM {
0x0: vmadc_vvm({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vu[i], Vs1_vu[i],
elem_mask(v0, ei)));
}}, OPIVV, VectorIntegerArithOp);
0x1: vmadc_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vu[i], Vs1_vu[i]));
}}, OPIVV, VectorIntegerArithOp);
}
0x13: decode VM {
0x0: vmsbc_vvm({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
borrow_out(Vs2_vi[i], Vs1_vi[i],
elem_mask(v0, ei)));
}}, OPIVV, VectorIntegerArithOp);
0x1: vmsbc_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
borrow_out(Vs2_vi[i], Vs1_vi[i]));
}}, OPIVV, VectorIntegerArithOp);
}
0x18: vmseq_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] == Vs1_vu[i]));
}}, OPIVV, VectorIntegerArithOp);
0x19: vmsne_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] != Vs1_vu[i]));
}}, OPIVV, VectorIntegerArithOp);
0x1a: vmsltu_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] < Vs1_vu[i]));
}}, OPIVV, VectorIntegerArithOp);
0x1b: vmslt_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] < Vs1_vi[i]));
}}, OPIVV, VectorIntegerArithOp);
0x1c: vmsleu_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] <= Vs1_vu[i]));
}}, OPIVV, VectorIntegerArithOp);
0x1d: vmsle_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] <= Vs1_vi[i]));
}}, OPIVV, VectorIntegerArithOp);
}
format VectorIntNarrowingFormat {
0x2c: vnsrl_wv({{
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
}}, OPIVV, VectorIntegerArithOp);
0x2d: vnsra_wv({{
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
}}, OPIVV, VectorIntegerArithOp);
0x2e: vnclipu_wv({{
vu max = std::numeric_limits<vu>::max();
uint64_t sign_mask =
std::numeric_limits<uint64_t>::max() << sew;
__uint128_t res = Vs2_vwu[i];
unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1);
res = int_rounding<__uint128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res & sign_mask) {
res = max;
// TODO: vxsat
}
Vd_vu[i + offset] = (vu)res;
}}, OPIVV, VectorIntegerArithOp);
0x2f: vnclip_wv({{
vi max = std::numeric_limits<vi>::max();
vi min = std::numeric_limits<vi>::min();
__int128_t res = Vs2_vwi[i];
unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1);
res = int_rounding<__int128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res < min) {
res = min;
// TODO: vxsat
} else if (res > max) {
res = max;
// TODO: vxsat
}
Vd_vi[i + offset] = (vi)res;
}}, OPIVV, VectorIntegerArithOp);
}
}
// OPFVV
0x1: decode VFUNCT6 {
0x00: VectorFloatFormat::vfadd_vv({{
auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x01: VectorReduceFloatFormat::vfredusum_vs({{
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
return fadd<et>(ftype<et>(src1), ftype<et>(src2));
}, Vs1_vu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
0x02: VectorFloatFormat::vfsub_vv({{
auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x03: VectorReduceFloatFormat::vfredosum_vs({{
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
return fadd<et>(ftype<et>(src1), ftype<et>(src2));
}, Vs1_vu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
0x04: VectorFloatFormat::vfmin_vv({{
auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x05: VectorReduceFloatFormat::vfredmin_vs({{
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
return fmin<et>(ftype<et>(src1), ftype<et>(src2));
}, Vs1_vu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
0x06: VectorFloatFormat::vfmax_vv({{
auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x07: VectorReduceFloatFormat::vfredmax_vs({{
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
return fmax<et>(ftype<et>(src1), ftype<et>(src2));
}, Vs1_vu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
0x08: VectorFloatFormat::vfsgnj_vv({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]),
false, false).v;
}}, OPFVV, VectorFloatArithOp);
0x09: VectorFloatFormat::vfsgnjn_vv({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]),
true, false).v;
}}, OPFVV, VectorFloatArithOp);
0x0a: VectorFloatFormat::vfsgnjx_vv({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]),
false, true).v;
}}, OPFVV, VectorFloatArithOp);
// VWFUNARY0
0x10: decode VS1 {
0x00: decode VM {
// The encodings corresponding to the masked versions
// (vm=0) of vfmv.f.s are reserved
0x1: VectorNonSplitFormat::vfmv_f_s({{
freg_t fd = freg(Vs2_vu[0]);
Fd_bits = fd.v;
}}, OPFVV, VectorMiscOp);
}
}
0x12: decode VS1 {
format VectorFloatCvtFormat {
0x00: vfcvt_xu_f_v({{
Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x01: vfcvt_x_f_v({{
Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x02: vfcvt_f_xu_v({{
auto fd = ui_to_f<et>(Vs2_vu[i]);
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x03: vfcvt_f_x_v({{
auto fd = i_to_f<et>(Vs2_vu[i]);
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x06: vfcvt_rtz_xu_f_v({{
Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
0x07: vfcvt_rtz_x_f_v({{
Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
}
format VectorFloatWideningCvtFormat {
0x08: vfwcvt_xu_f_v({{
Vd_vwu[i] = f_to_wui<et>(
ftype<et>(Vs2_vu[i + offset]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x09: vfwcvt_x_f_v({{
Vd_vwu[i] = f_to_wi<et>(
ftype<et>(Vs2_vu[i + offset]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x0a: vfwcvt_f_xu_v({{
auto fd = ui_to_wf<vu>(Vs2_vu[i + offset]);
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x0b: vfwcvt_f_x_v({{
auto fd = i_to_wf<vu>(Vs2_vu[i + offset]);
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x0c: vfwcvt_f_f_v({{
auto fd = f_to_wf<et>(
ftype<et>(Vs2_vu[i + offset]));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x0e: vfwcvt_rtz_xu_f_v({{
Vd_vwu[i] = f_to_wui<et>(
ftype<et>(Vs2_vu[i + offset]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
0x0f: vfwcvt_rtz_x_f_v({{
Vd_vwu[i] = f_to_wi<et>(
ftype<et>(Vs2_vu[i + offset]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
}
format VectorFloatNarrowingCvtFormat {
0x10: vfncvt_xu_f_w({{
Vd_vu[i + offset] = f_to_nui<vu>(
ftype<ewt>(Vs2_vwu[i]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x11: vfncvt_x_f_w({{
Vd_vu[i + offset] = f_to_ni<vu>(
ftype<ewt>(Vs2_vwu[i]),
softfloat_roundingMode);
}}, OPFVV, VectorFloatConvertOp);
0x12: vfncvt_f_xu_w({{
auto fd = ui_to_nf<et>(Vs2_vwu[i]);
Vd_vu[i + offset] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x13: vfncvt_f_x_w({{
auto fd = i_to_nf<et>(Vs2_vwu[i]);
Vd_vu[i + offset] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x14: vfncvt_f_f_w({{
auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
Vd_vu[i + offset] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x15: vfncvt_rod_f_f_w({{
softfloat_roundingMode = softfloat_round_odd;
auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
Vd_vu[i + offset] = fd.v;
}}, OPFVV, VectorFloatConvertOp);
0x16: vfncvt_rtz_xu_f_w({{
Vd_vu[i + offset] = f_to_nui<vu>(
ftype<ewt>(Vs2_vwu[i]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
0x17: vfncvt_rtz_x_f_w({{
Vd_vu[i + offset] = f_to_ni<vu>(
ftype<ewt>(Vs2_vwu[i]),
softfloat_round_minMag);
}}, OPFVV, VectorFloatConvertOp);
}
}
0x13: decode VS1 {
format VectorFloatCvtFormat {
0x00: vfsqrt_v({{
auto fd = fsqrt<et>(ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x04: vfrsqrt7_v({{
auto fd = frsqrte7<et>(ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x05: vfrec7_v({{
auto fd = frecip7<et>(ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x10: vfclass_v({{
auto fd = fclassify<et>(ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
}
}
format VectorFloatMaskFormat {
0x18: vmfeq_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
feq<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i])));
}}, OPFVV, VectorFloatArithOp);
0x19: vmfle_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
fle<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i])));
}}, OPFVV, VectorFloatArithOp);
0x1b: vmflt_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
flt<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i])));
}}, OPFVV, VectorFloatArithOp);
0x1c: vmfne_vv({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
!feq<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i])));
}}, OPFVV, VectorFloatArithOp);
}
format VectorFloatFormat {
0x20: vfdiv_vv({{
auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x24: vfmul_vv({{
auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
ftype<et>(Vs1_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x28: vfmadd_vv({{
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
ftype<et>(Vs1_vu[i]),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x29: vfnmadd_vv({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
ftype<et>(Vs1_vu[i]),
fneg(ftype<et>(Vs2_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2a: vfmsub_vv({{
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
ftype<et>(Vs1_vu[i]),
fneg(ftype<et>(Vs2_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2b: vfnmsub_vv({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
ftype<et>(Vs1_vu[i]),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2c: vfmacc_vv({{
auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
ftype<et>(Vs2_vu[i]),
ftype<et>(Vs3_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2d: vfnmacc_vv({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
ftype<et>(Vs2_vu[i]),
fneg(ftype<et>(Vs3_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2e: vfmsac_vv({{
auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
ftype<et>(Vs2_vu[i]),
fneg(ftype<et>(Vs3_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x2f: vfnmsac_vv({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
ftype<et>(Vs2_vu[i]),
ftype<et>(Vs3_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{
Vd_vwu[0] = reduce_loop(
[](const vwu& src1, const vu& src2) {
return fadd<ewt>(
ftype<ewt>(src1),
f_to_wf<et>(ftype<et>(src2))
);
}, Vs1_vwu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{
Vd_vwu[0] = reduce_loop(
[](const vwu& src1, const vu& src2) {
return fadd<ewt>(
ftype<ewt>(src1),
f_to_wf<et>(ftype<et>(src2))
);
}, Vs1_vwu, Vs2_vu);
}}, OPFVV, VectorFloatReduceOp);
}
format VectorFloatWideningFormat {
0x30: vfwadd_vv({{
auto fd = fadd<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype<et>(Vs1_vu[i + offset])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x32: vfwsub_vv({{
auto fd = fsub<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype<et>(Vs1_vu[i + offset])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x34: vfwadd_wv({{
auto fd = fadd<ewt>(
ftype<ewt>(Vs2_vwu[i]),
fwiden(ftype<et>(Vs1_vu[i + offset])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x36: vfwsub_wv({{
auto fd = fsub<ewt>(
ftype<ewt>(Vs2_vwu[i]),
fwiden(ftype<et>(Vs1_vu[i + offset])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x38: vfwmul_vv({{
auto fd = fmul<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype<et>(Vs1_vu[i + offset])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x3c: vfwmacc_vv({{
auto fd = fmadd<ewt>(
fwiden(ftype<et>(Vs1_vu[i + offset])),
fwiden(ftype<et>(Vs2_vu[i + offset])),
ftype<ewt>(Vs3_vwu[i]));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x3d: vfwnmacc_vv({{
auto fd = fmadd<ewt>(
fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
fneg(ftype<ewt>(Vs3_vwu[i])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x3e: vfwmsac_vv({{
auto fd = fmadd<ewt>(
fwiden(ftype<et>(Vs1_vu[i + offset])),
fwiden(ftype<et>(Vs2_vu[i + offset])),
fneg(ftype<ewt>(Vs3_vwu[i])));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
0x3f: vfwnmsac_vv({{
auto fd = fmadd<ewt>(
fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
ftype<ewt>(Vs3_vwu[i]));
Vd_vwu[i] = fd.v;
}}, OPFVV, VectorFloatArithOp);
}
}
// OPMVV
0x2: decode VFUNCT6 {
format VectorReduceIntFormat {
0x0: vredsum_vs({{
Vd_vi[0] =
reduce_loop(std::plus<vi>(), Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
0x1: vredand_vs({{
Vd_vi[0] =
reduce_loop(std::bit_and<vi>(), Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
0x2: vredor_vs({{
Vd_vi[0] =
reduce_loop(std::bit_or<vi>(), Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
0x3: vredxor_vs({{
Vd_vi[0] =
reduce_loop(std::bit_xor<vi>(), Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
0x4: vredminu_vs({{
Vd_vu[0] =
reduce_loop([](const vu& src1, const vu& src2) {
return std::min<vu>(src1, src2);
}, Vs1_vu, Vs2_vu);
}}, OPMVV, VectorIntegerReduceOp);
0x5: vredmin_vs({{
Vd_vi[0] =
reduce_loop([](const vi& src1, const vi& src2) {
return std::min<vi>(src1, src2);
}, Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
0x6: vredmaxu_vs({{
Vd_vu[0] =
reduce_loop([](const vu& src1, const vu& src2) {
return std::max<vu>(src1, src2);
}, Vs1_vu, Vs2_vu);
}}, OPMVV, VectorIntegerReduceOp);
0x7: vredmax_vs({{
Vd_vi[0] =
reduce_loop([](const vi& src1, const vi& src2) {
return std::max<vi>(src1, src2);
}, Vs1_vi, Vs2_vi);
}}, OPMVV, VectorIntegerReduceOp);
}
format VectorIntFormat {
0x8: vaaddu_vv({{
__uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i];
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vu[i] = res >> 1;
}}, OPMVV, VectorIntegerArithOp);
0x9: vaadd_vv({{
__uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i];
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vi[i] = res >> 1;
}}, OPMVV, VectorIntegerArithOp);
0xa: vasubu_vv({{
__uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i];
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vu[i] = res >> 1;
}}, OPMVV, VectorIntegerArithOp);
0xb: vasub_vv({{
__uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i];
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vi[i] = res >> 1;
}}, OPMVV, VectorIntegerArithOp);
}
// VWXUNARY0
0x10: decode VS1 {
0x00: decode VM {
// The encodings corresponding to the masked versions
// (vm=0) of vmv.x.s are reserved.
0x1: VectorNonSplitFormat::vmv_x_s({{
Rd_ud = Vs2_vi[0];
}}, OPMVV, VectorMiscOp);
}
0x10: Vector1Vs1RdMaskFormat::vcpop_m({{
uint64_t popcount = 0;
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
bool vs2_lsb = elem_mask(Vs2_vu, i);
if(this->vm){
popcount += vs2_lsb;
}else{
bool do_mask = elem_mask(v0, i);
popcount += (vs2_lsb && do_mask);
}
}
Rd_vu = popcount;
}}, OPMVV, VectorMiscOp);
0x11: Vector1Vs1RdMaskFormat::vfirst_m({{
int64_t pos = -1;
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
if(this->vm == 0){
if(elem_mask(v0, i)==0){
continue;
}
}
bool vs2_lsb = elem_mask(Vs2_vu, i);
if (vs2_lsb) {
pos = i;
break;
}
}
Rd_vu = pos;
}}, OPMVV, VectorMiscOp);
}
0x12: decode VS1 {
format VectorIntExtFormat {
0x02: vzext_vf8({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x03: vsext_vf8({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x04: vzext_vf4({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x05: vsext_vf4({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x06: vzext_vf2({{
auto offset = (vlen / SEW) * index;
Vd_vu[i] = Vs2_vextu[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
0x07: vsext_vf2({{
auto offset = (vlen / SEW) * index;
Vd_vi[i] = Vs2_vext[i + offset];
}}, OPMVV, VectorIntegerExtensionOp);
}
}
0x14: decode VS1 {
0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{
bool has_one = false;
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
bool vs2_lsb = elem_mask(Vs2_vu, i);
if (this->vm || elem_mask(v0, i)){
uint64_t res = 0;
if (!has_one && !vs2_lsb) {
res = 1;
} else if (!has_one && vs2_lsb) {
has_one = true;
}
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
}
}
}}, OPMVV, VectorMiscOp);
0x02: Vector1Vs1VdMaskFormat::vmsof_m({{
bool has_one = false;
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
bool vs2_lsb = elem_mask(Vs2_vu, i);
if (this->vm || elem_mask(v0, i)){
uint64_t res = 0;
if (!has_one && vs2_lsb) {
has_one = true;
res = 1;
}
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
}
}
}}, OPMVV, VectorMiscOp);
0x03: Vector1Vs1VdMaskFormat::vmsif_m({{
bool has_one = false;
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
bool vs2_lsb = elem_mask(Vs2_vu, i);
if (this->vm || elem_mask(v0, i)){
uint64_t res = 0;
if (!has_one && !vs2_lsb) {
res = 1;
} else if (!has_one && vs2_lsb) {
has_one = true;
res = 1;
}
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
}
}
}}, OPMVV, VectorMiscOp);
0x10: ViotaFormat::viota_m({{
RiscvISAInst::VecRegContainer tmp_s2;
xc->getRegOperand(this, 2,
&tmp_s2);
auto Vs2bit = tmp_s2.as<vu>();
for (uint32_t i = 0; i < this->microVl; i++) {
uint32_t ei = i +
vtype_VLMAX(vtype, vlen, true) *
this->microIdx;
bool vs2_lsb = elem_mask(Vs2bit, ei);
bool do_mask = elem_mask(v0, ei);
bool has_one = false;
if (this->vm || (do_mask && !this->vm)) {
if (vs2_lsb) {
has_one = true;
}
}
bool use_ori = (!this->vm) && !do_mask;
if(use_ori == false){
Vd_vu[i] = *cnt;
}
if (has_one) {
*cnt = *cnt+1;
}
}
}}, OPMVV, VectorMiscOp);
0x11: VectorIntFormat::vid_v({{
Vd_vu[i] = ei;
}}, OPMVV, VectorMiscOp);
}
format VectorMaskFormat {
0x18: vmandn_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i));
}}, OPMVV, VectorMiscOp);
0x19: vmand_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i));
}}, OPMVV, VectorMiscOp);
0x1a: vmor_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i));
}}, OPMVV, VectorMiscOp);
0x1b: vmxor_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i));
}}, OPMVV, VectorMiscOp);
0x1c: vmorn_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i));
}}, OPMVV, VectorMiscOp);
0x1d: vmnand_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
!(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)));
}}, OPMVV, VectorMiscOp);
0x1e: vmnor_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
!(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)));
}}, OPMVV, VectorMiscOp);
0x1f: vmxnor_mm({{
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
!(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)));
}}, OPMVV, VectorMiscOp);
}
format VectorIntFormat {
0x20: vdivu_vv({{
Vd_vu[i] = divu<vu>(Vs2_vu[i], Vs1_vu[i]);
}}, OPMVV, VectorIntegerArithOp);
0x21: vdiv_vv({{
Vd_vi[i] = div<vi>(Vs2_vi[i], Vs1_vi[i]);
}}, OPMVV, VectorIntegerArithOp);
0x22: vremu_vv({{
Vd_vu[i] = remu<vu>(Vs2_vu[i], Vs1_vu[i]);
}}, OPMVV, VectorIntegerArithOp);
0x23: vrem_vv({{
Vd_vi[i] = rem<vi>(Vs2_vi[i], Vs1_vi[i]);
}}, OPMVV, VectorIntegerArithOp);
0x24: vmulhu_vv({{
Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Vs1_vu[i]);
}}, OPMVV, VectorIntegerArithOp);
0x25: vmul_vv({{
Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i];
}}, OPMVV, VectorIntegerArithOp);
0x26: vmulhsu_vv({{
Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Vs1_vu[i]);
}}, OPMVV, VectorIntegerArithOp);
0x27: vmulh_vv({{
Vd_vi[i] = mulh<vi>(Vs2_vi[i], Vs1_vi[i]);
}}, OPMVV, VectorIntegerArithOp);
0x29: vmadd_vv({{
Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i];
}}, OPMVV, VectorIntegerArithOp);
0x2b: vnmsub_vv({{
Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i];
}}, OPMVV, VectorIntegerArithOp);
0x2d: vmacc_vv({{
Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i];
}}, OPMVV, VectorIntegerArithOp);
0x2f: vnmsac_vv({{
Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i];
}}, OPMVV, VectorIntegerArithOp);
}
format VectorIntWideningFormat {
0x30: vwaddu_vv({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
+ vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x31: vwadd_vv({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
+ vwi(Vs1_vi[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x32: vwsubu_vv({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
- vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x33: vwsub_vv({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
- vwi(Vs1_vi[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x34: vwaddu_wv({{
Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x35: vwadd_wv({{
Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x36: vwsubu_wv({{
Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x37: vwsub_wv({{
Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x38: vwmulu_vv({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
* vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x3a: vwmulsu_vv({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
* vwu(Vs1_vu[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x3b: vwmul_vv({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
* vwi(Vs1_vi[i + offset]);
}}, OPMVV, VectorIntegerArithOp);
0x3c: vwmaccu_vv({{
Vd_vwu[i] = vwu(Vs1_vu[i + offset])
* vwu(Vs2_vu[i + offset])
+ Vs3_vwu[i];
}}, OPMVV, VectorIntegerArithOp);
0x3d: vwmacc_vv({{
Vd_vwi[i] = vwi(Vs1_vi[i + offset])
* vwi(Vs2_vi[i + offset])
+ Vs3_vwi[i];
}}, OPMVV, VectorIntegerArithOp);
0x3f: vwmaccsu_vv({{
Vd_vwi[i] = vwi(Vs1_vi[i + offset])
* vwu(Vs2_vu[i + offset])
+ Vs3_vwi[i];
}}, OPMVV, VectorIntegerArithOp);
}
}
// OPIVI
0x3: decode VFUNCT6 {
format VectorIntFormat {
0x00: vadd_vi({{
Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5);
}}, OPIVI, VectorIntegerArithOp);
0x03: vrsub_vi({{
Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i];
}}, OPIVI, VectorIntegerArithOp);
0x09: vand_vi({{
Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5);
}}, OPIVI, VectorIntegerArithOp);
0x0a: vor_vi({{
Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5);
}}, OPIVI, VectorIntegerArithOp);
0x0b: vxor_vi({{
Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5);
}}, OPIVI, VectorIntegerArithOp);
}
0x0c: VectorGatherFormat::vrgather_vi({{
for (uint32_t i = 0; i < microVl; i++) {
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
if (this->vm || elem_mask(v0, ei)) {
const uint64_t idx =
(uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx;
Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0
: (idx < vs2_elems) ? Vs2_vu[idx]
: Vs3_vu[i];
}
}
}}, OPIVI, VectorMiscOp);
0x0e: VectorSlideUpFormat::vslideup_vi({{
const int offset = (int)(uint64_t)(SIMM5);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int vs2Offset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int elemOffset = vdOffset + vdIdx * microVlmax;
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
if (this->vm || elem_mask(v0, i + elemOffset)) {
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
}
}
}}, OPIVI, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslidedown_vi({{
const int offset = (int)(uint64_t)(SIMM5);
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const bool needZeroTail = numVs2s == vs2Idx + 1;
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int vs2Offset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int elemIdxBase = vdIdx * microVlmax;
vreg_t resVreg;
auto res = resVreg.as<vu>();
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
if (needZeroTail) {
for (int i = upperBound + vdOffset;
i < microVlmax; i++) {
res[i] = 0;
}
}
for (int i = vdOffset; i < microVl ; i++) {
if (vm || elem_mask(v0, i + elemIdxBase)) {
Vd_vu[i] = res[i];
}
}
}
}}, OPIVI, VectorMiscOp);
format VectorIntFormat {
0x10: decode VM {
0x0: vadc_vim({{
Vd_vi[i] = Vs2_vi[i] +
(vi)sext<5>(SIMM5) + elem_mask(v0, ei);
}}, OPIVI, VectorIntegerArithOp);
// the unmasked versions (vm=1) are reserved
}
0x17: decode VM {
0x0: vmerge_vim({{
Vd_vi[i] = elem_mask(v0, ei)
? (vi)sext<5>(SIMM5)
: Vs2_vi[i];
}}, OPIVI, VectorIntegerArithOp);
0x1: vmv_v_i({{
Vd_vi[i] = (vi)sext<5>(SIMM5);
}}, OPIVI, VectorIntegerArithOp);
}
}
format VectorIntVxsatFormat{
0x20: vsaddu_vi({{
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], (vu)sext<5>(SIMM5),
vxsatptr);
}}, OPIVI, VectorIntegerArithOp);
0x21: vsadd_vi({{
Vd_vi[i] = sat_add<vi>(Vs2_vi[i], (vi)sext<5>(SIMM5),
vxsatptr);
}}, OPIVI, VectorIntegerArithOp);
}
format VectorIntFormat {
0x25: vsll_vi({{
Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f);
}}, OPIVI, VectorIntegerArithOp);
0x28: vsrl_vi({{
Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
}}, OPIVI, VectorIntegerArithOp);
0x2a: vssrl_vi({{
int sh = SIMM5 & (vtype_SEW(vtype) - 1);
__uint128_t res = Vs2_vu[i];
res = int_rounding<__uint128_t>(
res, 0 /* TODO */, sh) >> sh;
Vd_vu[i] = res;
}}, OPIVI, VectorIntegerArithOp);
0x29: vsra_vi({{
Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
}}, OPIVI, VectorIntegerArithOp);
0x2b: vssra_vi({{
int sh = SIMM5 & (sew - 1);
__int128_t val = Vs2_vi[i];
val = int_rounding<__int128_t>(val,
xc->readMiscReg(MISCREG_VXRM), sh);
Vd_vi[i] = val >> sh;
}}, OPIVI, VectorIntegerArithOp);
}
// According to Spec Section 16.6,
// vm must be 1 (unmasked) in vmv<nr>r.v instructions.
0x27: decode VM { 0x1: decode SIMM3 {
format VMvWholeFormat {
0x0: vmv1r_v({{
Vd_ud[i] = Vs2_ud[i];
}}, OPIVI, VectorMiscOp);
0x1: vmv2r_v({{
Vd_ud[i] = Vs2_ud[i];
}}, OPIVI, VectorMiscOp);
0x3: vmv4r_v({{
Vd_ud[i] = Vs2_ud[i];
}}, OPIVI, VectorMiscOp);
0x7: vmv8r_v({{
Vd_ud[i] = Vs2_ud[i];
}}, OPIVI, VectorMiscOp);
}
}}
format VectorIntMaskFormat {
0x11: decode VM {
0x0: vmadc_vim({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5),
elem_mask(v0, ei)));
}}, OPIVI, VectorIntegerArithOp);
0x1: vmadc_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
}
0x18: vmseq_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] == (vi)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
0x19: vmsne_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] != (vi)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
0x1c: vmsleu_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] <= (vu)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
0x1d: vmsle_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] <= (vi)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
0x1e: vmsgtu_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] > (vu)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
0x1f: vmsgt_vi({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] > (vi)sext<5>(SIMM5)));
}}, OPIVI, VectorIntegerArithOp);
}
format VectorIntNarrowingFormat {
0x2c: vnsrl_wi({{
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
((vwu)SIMM5 & (sew * 2 - 1)));
}}, OPIVI, VectorIntegerArithOp);
0x2d: vnsra_wi({{
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
((vwu)SIMM5 & (sew * 2 - 1)));
}}, OPIVI, VectorIntegerArithOp);
0x2e: vnclipu_wi({{
vu max = std::numeric_limits<vu>::max();
uint64_t sign_mask =
std::numeric_limits<uint64_t>::max() << sew;
__uint128_t res = Vs2_vwu[i];
unsigned shift = VS1 & ((sew * 2) - 1);
res = int_rounding<__uint128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res & sign_mask) {
// TODO: vxsat
res = max;
}
Vd_vu[i + offset] = (vu)res;
}}, OPIVI, VectorIntegerArithOp);
0x2f: vnclip_wi({{
vi max = std::numeric_limits<vi>::max();
vi min = std::numeric_limits<vi>::min();
__int128_t res = Vs2_vwi[i];
unsigned shift = VS1 & ((sew * 2) - 1);
res = int_rounding<__int128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res < min) {
res = min;
// TODO: vxsat
} else if (res > max) {
res = max;
// TODO: vxsat
}
Vd_vi[i + offset] = (vi)res;
}}, OPIVI, VectorIntegerArithOp);
}
}
// OPIVX
0x4: decode VFUNCT6 {
format VectorIntFormat {
0x0: vadd_vx({{
Vd_vu[i] = Vs2_vu[i] + Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
0x2: vsub_vx({{
Vd_vu[i] = Vs2_vu[i] - Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
0x3: vrsub_vx({{
Vd_vu[i] = Rs1_vu - Vs2_vu[i];
}}, OPIVX, VectorIntegerArithOp);
0x4: vminu_vx({{
Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu);
}}, OPIVX, VectorIntegerArithOp);
0x5: vmin_vx({{
Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi);
}}, OPIVX, VectorIntegerArithOp);
0x6: vmaxu_vx({{
Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu);
}}, OPIVX, VectorIntegerArithOp);
0x7: vmax_vx({{
Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi);
}}, OPIVX, VectorIntegerArithOp);
0x9: vand_vx({{
Vd_vu[i] = Vs2_vu[i] & Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
0xa: vor_vx({{
Vd_vu[i] = Vs2_vu[i] | Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
0xb: vxor_vx({{
Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
}
0x0e: VectorSlideUpFormat::vslideup_vx({{
const int offset = (int)Rs1_vu;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int vs2Offset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int elemOffset = vdOffset + vdIdx * microVlmax;
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
if (this->vm || elem_mask(v0, i + elemOffset)) {
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
}
}
}}, OPIVX, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslidedown_vx({{
const int offset = (int)Rs1_vu;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const bool needZeroTail = numVs2s == vs2Idx + 1;
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int vs2Offset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int elemIdxBase = vdIdx * microVlmax;
vreg_t resVreg;
auto res = resVreg.as<vu>();
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
if (needZeroTail) {
for (int i = upperBound + vdOffset;
i < microVlmax; i++) {
res[i] = 0;
}
}
for (int i = vdOffset; i < microVl ; i++) {
if (vm || elem_mask(v0, i + elemIdxBase)) {
Vd_vu[i] = res[i];
}
}
}
}}, OPIVX, VectorMiscOp);
0x0c: VectorGatherFormat::vrgather_vx({{
for (uint32_t i = 0; i < microVl; i++) {
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
if (this->vm || elem_mask(v0, ei)) {
const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx;
Vd_vu[i] = (Rs1_vu >= vlmax) ? 0
: (idx < vs2_elems) ? Vs2_vu[idx]
: Vs3_vu[i];
}
}
}}, OPIVX, VectorMiscOp);
format VectorIntFormat {
0x10: decode VM {
0x0: vadc_vxm({{
Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei);
}}, OPIVX, VectorIntegerArithOp);
// the unmasked versions (vm=1) are reserved
}
0x12: decode VM {
0x0: vsbc_vxm({{
Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei);
}}, OPIVX, VectorIntegerArithOp);
// the unmasked versions (vm=1) are reserved
}
0x17: decode VM {
0x0: vmerge_vxm({{
Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i];
}}, OPIVX, VectorIntegerArithOp);
0x1: decode VS2 {
0x0: vmv_v_x({{
Vd_vu[i] = Rs1_vu;
}}, OPIVX, VectorIntegerArithOp);
}
}
}
format VectorIntVxsatFormat{
0x20: vsaddu_vx({{
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Rs1_vu,
vxsatptr);
}}, OPIVX, VectorIntegerArithOp);
0x21: vsadd_vx({{
Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Rs1_vu,
vxsatptr);
}}, OPIVX, VectorIntegerArithOp);
0x22: vssubu_vx({{
Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Rs1_vu,
vxsatptr);
}}, OPIVX, VectorIntegerArithOp);
0x23: vssub_vx({{
Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Rs1_vu,
vxsatptr);
}}, OPIVX, VectorIntegerArithOp);
0x27: vsmul_vx({{
vi max = std::numeric_limits<vi>::max();
vi min = std::numeric_limits<vi>::min();
bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min;
__int128_t result =
(__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i];
result = int_rounding<__uint128_t>(
result, 0 /* TODO */, sew - 1);
result = result >> (sew - 1);
if (overflow) {
result = max;
*vxsatptr = true;
}
Vd_vi[i] = (vi)result;
}}, OPIVX, VectorIntegerArithOp);
}
format VectorIntFormat {
0x25: vsll_vx({{
Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1));
}}, OPIVX, VectorIntegerArithOp);
0x28: vsrl_vx({{
Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1));
}}, OPIVX, VectorIntegerArithOp);
0x29: vsra_vx({{
Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1));
}}, OPIVX, VectorIntegerArithOp);
0x2a: vssrl_vx({{
int sh = Rs1_vu & (sew - 1);
__uint128_t val = Vs2_vu[i];
val = int_rounding<__uint128_t>(val,
xc->readMiscReg(MISCREG_VXRM), sh);
Vd_vu[i] = val >> sh;
}}, OPIVX, VectorIntegerArithOp);
0x2b: vssra_vx({{
int sh = Rs1_vu & (sew - 1);
__int128_t val = Vs2_vi[i];
val = int_rounding<__int128_t>(val,
xc->readMiscReg(MISCREG_VXRM), sh);
Vd_vi[i] = val >> sh;
}}, OPIVX, VectorIntegerArithOp);
}
format VectorIntNarrowingFormat {
0x2c: vnsrl_wx({{
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
((vwu)Rs1_vu & (sew * 2 - 1)));
}}, OPIVX, VectorIntegerArithOp);
0x2d: vnsra_wx({{
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
((vwu)Rs1_vu & (sew * 2 - 1)));
}}, OPIVX, VectorIntegerArithOp);
0x2e: vnclipu_wx({{
vu max = std::numeric_limits<vu>::max();
uint64_t sign_mask =
std::numeric_limits<uint64_t>::max() << sew;
__uint128_t res = Vs2_vwu[i];
unsigned shift = Rs1_vu & ((sew * 2) - 1);
res = int_rounding<__uint128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res & sign_mask) {
// TODO: vxsat
res = max;
}
Vd_vu[i + offset] = (vu)res;
}}, OPIVX, VectorIntegerArithOp);
0x2f: vnclip_wx({{
vi max = std::numeric_limits<vi>::max();
vi min = std::numeric_limits<vi>::min();
__int128_t res = Vs2_vwi[i];
unsigned shift = Rs1_vi & ((sew * 2) - 1);
res = int_rounding<__int128_t>(
res, 0 /* TODO */, shift) >> shift;
if (res < min) {
res = min;
// TODO: vxsat
} else if (res > max) {
res = max;
// TODO: vxsat
}
Vd_vi[i + offset] = (vi)res;
}}, OPIVX, VectorIntegerArithOp);
}
format VectorIntMaskFormat {
0x11: decode VM {
0x0: vmadc_vxm({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vi[i], Rs1_vi,
elem_mask(v0, ei)));
}}, OPIVX, VectorIntegerArithOp);
0x1: vmadc_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
carry_out(Vs2_vi[i], Rs1_vi));
}}, OPIVX, VectorIntegerArithOp);
}
0x13: decode VM {
0x0: vmsbc_vxm({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
borrow_out(Vs2_vi[i], Rs1_vi,
elem_mask(v0, ei)));
}}, OPIVX, VectorIntegerArithOp);
0x1: vmsbc_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
borrow_out(Vs2_vi[i], Rs1_vi));
}}, OPIVX, VectorIntegerArithOp);
}
0x18: vmseq_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] == Rs1_vu));
}}, OPIVX, VectorIntegerArithOp);
0x19: vmsne_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] != Rs1_vu));
}}, OPIVX, VectorIntegerArithOp);
0x1a: vmsltu_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] < Rs1_vu));
}}, OPIVX, VectorIntegerArithOp);
0x1b: vmslt_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] < Rs1_vi));
}}, OPIVX, VectorIntegerArithOp);
0x1c: vmsleu_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] <= Rs1_vu));
}}, OPIVX, VectorIntegerArithOp);
0x1d: vmsle_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] <= Rs1_vi));
}}, OPIVX, VectorIntegerArithOp);
0x1e: vmsgtu_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vu[i] > Rs1_vu));
}}, OPIVX, VectorIntegerArithOp);
0x1f: vmsgt_vx({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
(Vs2_vi[i] > Rs1_vi));
}}, OPIVX, VectorIntegerArithOp);
}
}
// OPFVF
0x5: decode VFUNCT6 {
format VectorFloatFormat{
0x00: vfadd_vf({{
auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x02: vfsub_vf({{
auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x04: vfmin_vf({{
auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x06: vfmax_vf({{
auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x08: vfsgnj_vf({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)),
false, false).v;
}}, OPFVF, VectorFloatArithOp);
0x09: vfsgnjn_vf({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)),
true, false).v;
}}, OPFVF, VectorFloatArithOp);
0x0a: vfsgnjx_vf({{
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)),
false, true).v;
}}, OPFVF, VectorFloatArithOp);
}
0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int vs2Offset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int elemOffset = vdOffset + vdIdx * microVlmax;
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
if (this->vm || elem_mask(v0, i + elemOffset)) {
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
}
// TODO: dirty code
if (vdIdx == 0 && vs2Idx == 0 &&
(this->vm || elem_mask(v0, 0))) {
tmp_d0.as<vu>()[0] = Rs1_vu;
}
}
}}, OPFVF, VectorMiscOp);
0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const bool needZeroTail = numVs2s == vs2Idx + 1;
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int vs2Offset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int elemIdxBase = vdIdx * microVlmax;
vreg_t resVreg;
auto res = resVreg.as<vu>();
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
if (needZeroTail) {
for (int i = upperBound + vdOffset;
i < microVlmax; i++) {
res[i] = 0;
}
}
for (int i = vdOffset; i < microVl ; i++) {
if (vm || elem_mask(v0, i + elemIdxBase)) {
Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
? res[i]
: Rs1_vu;
}
}
}
}}, OPFVF, VectorMiscOp);
// VRFUNARY0
0x10: decode VS2 {
0x00: decode VM {
// The encodings corresponding to the masked versions
// (vm=0) of vfmv.s.f are reserved
0x1: VectorNonSplitFormat::vfmv_s_f({{
if (this->vl) {
auto fd = ftype_freg<et>(freg(Fs1_bits));
Vd_vu[0] = fd.v;
}
}}, OPFVV, VectorMiscOp);
}
}
format VectorFloatFormat{
0x17: decode VM {
0x0: vfmerge_vfm({{
Vd_vu[i] = elem_mask(v0, ei)
? ftype_freg<et>(freg(Fs1_bits)).v
: Vs2_vu[i];
}}, OPFVF, VectorFloatArithOp);
0x1: vfmv_v_f({{
auto fd = ftype_freg<et>(freg(Fs1_bits));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
}
}
format VectorFloatMaskFormat {
0x18: vmfeq_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
feq<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits))));
}}, OPFVF, VectorFloatArithOp);
0x19: vmfle_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
fle<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits))));
}}, OPFVF, VectorFloatArithOp);
0x1b: vmflt_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
flt<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits))));
}}, OPFVF, VectorFloatArithOp);
0x1c: vmfne_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
!feq<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits))));
}}, OPFVF, VectorFloatArithOp);
0x1d: vmfgt_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
flt<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i])));
}}, OPFVF, VectorFloatArithOp);
0x1f: vmfge_vf({{
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
fle<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i])));
}}, OPFVF, VectorFloatArithOp);
}
format VectorFloatFormat{
0x20: vfdiv_vf({{
auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x21: vfrdiv_vf({{
auto fd = fdiv<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x24: vfmul_vf({{
auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
ftype_freg<et>(freg(Fs1_bits)));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x27: vfrsub_vf({{
auto fd = fsub<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x28: vfmadd_vf({{
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x29: vfnmadd_vf({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
ftype_freg<et>(freg(Fs1_bits)),
fneg(ftype<et>(Vs2_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2a: vfmsub_vf({{
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
ftype_freg<et>(freg(Fs1_bits)),
fneg(ftype<et>(Vs2_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2b: vfnmsub_vf({{
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2c: vfmacc_vf({{
auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]),
ftype<et>(Vs3_vu[i]));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2d: vfnmacc_vf({{
auto fd = fmadd<et>(
fneg(ftype_freg<et>(freg(Fs1_bits))),
ftype<et>(Vs2_vu[i]),
fneg(ftype<et>(Vs3_vu[i]))
);
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2e: vfmsac_vf({{
auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
ftype<et>(Vs2_vu[i]),
fneg(ftype<et>(Vs3_vu[i])));
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x2f: vfnmsac_vf({{
auto fd = fmadd<et>(
fneg(ftype_freg<et>(freg(Fs1_bits))),
ftype<et>(Vs2_vu[i]),
ftype<et>(Vs3_vu[i])
);
Vd_vu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
}
format VectorFloatWideningFormat {
0x30: vfwadd_vf({{
auto fd = fadd<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype_freg<et>(freg(Fs1_bits))));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x32: vfwsub_vf({{
auto fd = fsub<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype_freg<et>(freg(Fs1_bits))));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x34: vfwadd_wf({{
auto fd = fadd<ewt>(
ftype<ewt>(Vs2_vwu[i]),
fwiden(ftype_freg<et>(freg(Fs1_bits))));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x36: vfwsub_wf({{
auto fd = fsub<ewt>(
ftype<ewt>(Vs2_vwu[i]),
fwiden(ftype_freg<et>(freg(Fs1_bits))));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x38: vfwmul_vf({{
auto fd = fmul<ewt>(
fwiden(ftype<et>(Vs2_vu[i + offset])),
fwiden(ftype_freg<et>(freg(Fs1_bits))));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x3c: vfwmacc_vf({{
auto fd = fmadd<ewt>(
fwiden(ftype_freg<et>(freg(Fs1_bits))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
ftype<ewt>(Vs3_vwu[i]));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x3d: vfwnmacc_vf({{
auto fd = fmadd<ewt>(
fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
fneg(ftype<ewt>(Vs3_vwu[i])));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x3e: vfwmsac_vf({{
auto fd = fmadd<ewt>(
fwiden(ftype_freg<et>(freg(Fs1_bits))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
fneg(ftype<ewt>(Vs3_vwu[i])));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
0x3f: vfwnmsac_vf({{
auto fd = fmadd<ewt>(
fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
fwiden(ftype<et>(Vs2_vu[i + offset])),
ftype<ewt>(Vs3_vwu[i]));
Vd_vwu[i] = fd.v;
}}, OPFVF, VectorFloatArithOp);
}
}
// OPMVX
0x6: decode VFUNCT6 {
format VectorIntFormat {
0x08: vaaddu_vx({{
__uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu;
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vu[i] = res >> 1;
}}, OPMVX, VectorIntegerArithOp);
0x09: vaadd_vx({{
__uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi;
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vi[i] = res >> 1;
}}, OPMVX, VectorIntegerArithOp);
}
0x0e: VectorSlideUpFormat::vslide1up_vx({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vdIdx - vs2Idx;
const int offsetInVreg = offset - vregOffset * microVlmax;
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int vs2Offset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int elemOffset = vdOffset + vdIdx * microVlmax;
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
if (this->vm || elem_mask(v0, i + elemOffset)) {
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
}
// TODO: dirty code
if (vdIdx == 0 && vs2Idx == 0 &&
(this->vm || elem_mask(v0, 0))) {
tmp_d0.as<vu>()[0] = Rs1_vu;
}
}
}}, OPIVX, VectorMiscOp);
0x0f: VectorSlideDownFormat::vslide1down_vx({{
const int offset = 1;
const int microVlmax = vtype_VLMAX(machInst.vtype8,
vlen, true);
const int vregOffset = vs2Idx - vdIdx;
const int offsetInVreg = offset - vregOffset * microVlmax;
const int numVs2s = vtype_regs_per_group(vtype);
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
const bool needZeroTail = numVs2s == vs2Idx + 1;
const int upperBound = (offsetInVreg >= 0)
? microVlmax - offsetInVreg
: microVlmax + offsetInVreg;
const int vdOffset = (offsetInVreg >= 0)
? 0
: -offsetInVreg;
const int vs2Offset = (offsetInVreg >= 0)
? offsetInVreg
: 0;
const int elemIdxBase = vdIdx * microVlmax;
vreg_t resVreg;
auto res = resVreg.as<vu>();
for (int i = 0;
i < upperBound && i + vdOffset < microVl;
i++) {
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
}
if (needZeroTail) {
for (int i = upperBound + vdOffset;
i < microVlmax; i++) {
res[i] = 0;
}
}
for (int i = vdOffset; i < microVl ; i++) {
if (vm || elem_mask(v0, i + elemIdxBase)) {
Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
? res[i]
: Rs1_vu;
}
}
}
}}, OPIVX, VectorMiscOp);
// VRXUNARY0
0x10: decode VS2 {
0x00: decode VM {
// The encodings corresponding to the masked versions
// (vm=0) of vmv.s.x are reserved.
0x1: VectorNonSplitFormat::vmv_s_x({{
if (this->vl) {
Vd_vu[0] = Rs1_vu;
}
}}, OPMVX, VectorMiscOp);
}
}
format VectorIntFormat {
0x0a: vasubu_vx({{
__uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu;
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vu[i] = res >> 1;
}}, OPMVX, VectorIntegerArithOp);
0x0b: vasub_vx({{
__uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi;
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
Vd_vi[i] = res >> 1;
}}, OPMVX, VectorIntegerArithOp);
0x20: vdivu_vx({{
Vd_vu[i] = divu<vu>(Vs2_vu[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x21: vdiv_vx({{
Vd_vi[i] = div<vi>(Vs2_vi[i], Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x22: vremu_vx({{
Vd_vu[i] = remu<vu>(Vs2_vu[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x23: vrem_vx({{
Vd_vi[i] = rem<vi>(Vs2_vi[i], Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x24: vmulhu_vx({{
Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x25: vmul_vx({{
Vd_vi[i] = Vs2_vi[i] * Rs1_vi;
}}, OPMVX, VectorIntegerArithOp);
0x26: vmulhsu_vx({{
Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x27: vmulh_vx({{
Vd_vi[i] = mulh<vi>(Vs2_vi[i], Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x29: vmadd_vx({{
Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i];
}}, OPMVX, VectorIntegerArithOp);
0x2b: vnmsub_vx({{
Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i];
}}, OPMVX, VectorIntegerArithOp);
0x2d: vmacc_vx({{
Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i];
}}, OPMVX, VectorIntegerArithOp);
0x2f: vnmsac_vx({{
Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i];
}}, OPMVX, VectorIntegerArithOp);
}
format VectorIntWideningFormat {
0x30: vwaddu_vx({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x31: vwadd_vx({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x32: vwsubu_vx({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x33: vwsub_vx({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x34: vwaddu_wx({{
Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x35: vwadd_wx({{
Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x36: vwsubu_wx({{
Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x37: vwsub_wx({{
Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x38: vwmulu_vx({{
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x3a: vwmulsu_vx({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu);
}}, OPMVX, VectorIntegerArithOp);
0x3b: vwmul_vx({{
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi);
}}, OPMVX, VectorIntegerArithOp);
0x3c: vwmaccu_vx({{
Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset])
+ Vs3_vwu[i];
}}, OPMVX, VectorIntegerArithOp);
0x3d: vwmacc_vx({{
Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset])
+ Vs3_vwi[i];
}}, OPMVX, VectorIntegerArithOp);
0x3e: vwmaccus_vx({{
Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset])
+ Vs3_vwi[i];
}}, OPMVX, VectorIntegerArithOp);
0x3f: vwmaccsu_vx({{
Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset])
+ Vs3_vwi[i];
}}, OPMVX, VectorIntegerArithOp);
}
}
0x7: decode BIT31 {
format VConfOp {
0x0: vsetvli({{
uint64_t rd_bits = RD;
uint64_t rs1_bits = RS1;
uint64_t requested_vl = Rs1_ud;
uint64_t requested_vtype = zimm11;
uint32_t vlen = VlenbBits * 8;
uint32_t vlmax = getVlmax(Vtype, vlen);
uint32_t current_vl = VL;
}}, {{
Rd_ud = new_vl;
VL = new_vl;
Vtype = new_vtype;
}}, VSetVlDeclare, VSetVliBranchTarget
, VectorConfigOp, IsUncondControl
, IsIndirectControl);
0x1: decode BIT30 {
0x0: vsetvl({{
uint64_t rd_bits = RD;
uint64_t rs1_bits = RS1;
uint64_t requested_vl = Rs1_ud;
uint64_t requested_vtype = Rs2_ud;
uint32_t vlen = VlenbBits * 8;
uint32_t vlmax = getVlmax(Vtype, vlen);
uint32_t current_vl = VL;
}}, {{
Rd_ud = new_vl;
VL = new_vl;
Vtype = new_vtype;
}}, VSetVlDeclare, VSetVlBranchTarget
, VectorConfigOp, IsUncondControl
, IsIndirectControl);
0x1: vsetivli({{
uint64_t rd_bits = RD;
uint64_t rs1_bits = -1;
uint64_t requested_vl = uimm;
uint64_t requested_vtype = zimm10;
uint32_t vlen = VlenbBits * 8;
uint32_t vlmax = getVlmax(Vtype, vlen);
uint32_t current_vl = VL;
}}, {{
Rd_ud = new_vl;
VL = new_vl;
Vtype = new_vtype;
}}, VSetiVliDeclare, VSetiVliBranchTarget
, VectorConfigOp, IsUncondControl
, IsDirectControl);
}
}
}
}
0x18: decode FUNCT3 {
format BOp {
0x0: beq({{
if (rvSext(Rs1) == rvSext(Rs2)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
0x1: bne({{
if (rvSext(Rs1) != rvSext(Rs2)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
0x4: blt({{
if (rvSext(Rs1_sd) < rvSext(Rs2_sd)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
0x5: bge({{
if (rvSext(Rs1_sd) >= rvSext(Rs2_sd)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
0x6: bltu({{
if (rvZext(Rs1) < rvZext(Rs2)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
0x7: bgeu({{
if (rvZext(Rs1) >= rvZext(Rs2)) {
NPC = rvZext(PC + imm);
} else {
NPC = rvZext(NPC);
}
}}, IsDirectControl, IsCondControl);
}
}
0x19: decode FUNCT3 {
0x0: Jump::jalr({{
Rd = rvSext(NPC);
NPC = rvZext((imm + Rs1) & (~0x1));
}}, IsIndirectControl, IsUncondControl);
}
0x1b: JOp::jal({{
Rd = rvSext(NPC);
NPC = rvZext(PC + imm);
}}, IsDirectControl, IsUncondControl);
0x1c: decode FUNCT3 {
format SystemOp {
0x0: decode FUNCT7 {
0x0: decode RS2 {
0x0: ecall({{
return std::make_shared<SyscallFault>(
(PrivilegeMode)xc->readMiscReg(MISCREG_PRV));
}}, IsSerializeAfter, IsNonSpeculative, IsSyscall,
No_OpClass);
0x1: ebreak({{
return std::make_shared<BreakpointFault>(
xc->pcState());
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x2: uret({{
MISA misa = xc->readMiscReg(MISCREG_ISA);
if (!misa.rvn) {
return std::make_shared<IllegalInstFault>(
"sret can't execute without N systems",
machInst);
}
STATUS status = xc->readMiscReg(MISCREG_STATUS);
status.uie = status.upie;
status.upie = 1;
xc->setMiscReg(MISCREG_STATUS, status);
NPC = xc->readMiscReg(MISCREG_UEPC);
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
}
0x8: decode RS2 {
0x2: sret({{
MISA misa = xc->readMiscReg(MISCREG_ISA);
if (!misa.rvs) {
return std::make_shared<IllegalInstFault>(
"sret can't execute without RVS",
machInst);
}
STATUS status = xc->readMiscReg(MISCREG_STATUS);
auto pm = (PrivilegeMode)xc->readMiscReg(
MISCREG_PRV);
if (pm == PRV_U ||
(pm == PRV_S && status.tsr == 1)) {
return std::make_shared<IllegalInstFault>(
"sret in user mode or TSR enabled",
machInst);
NPC = NPC;
} else {
xc->setMiscReg(MISCREG_PRV, status.spp);
status.sie = status.spie;
status.spie = 1;
status.spp = PRV_U;
xc->setMiscReg(MISCREG_STATUS, status);
NPC = xc->readMiscReg(MISCREG_SEPC);
}
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
0x5: wfi({{
MISA misa = xc->readMiscReg(MISCREG_ISA);
STATUS status = xc->readMiscReg(MISCREG_STATUS);
auto pm = (PrivilegeMode)xc->readMiscReg(
MISCREG_PRV);
if (misa.rvs && (pm == PRV_U ||
(pm == PRV_S && status.tw == 1))) {
return std::make_shared<IllegalInstFault>(
"wfi in user mode or TW enabled",
machInst);
}
// Go to sleep only if there's no pending interrupt
// at all, including masked interrupts.
auto tc = xc->tcBase();
auto cpu = tc->getCpuPtr();
auto ic = dynamic_cast<RiscvISA::Interrupts*>(
cpu->getInterruptController(tc->threadId()));
panic_if(!ic, "Invalid Interrupt Controller.");
if (ic->readIP() == 0
&& xc->readMiscReg(MISCREG_NMIP) == 0) {
tc->quiesce();
}
}}, IsNonSpeculative, IsQuiesce,
IsSerializeAfter, No_OpClass, IsSquashAfter);
}
0x9: sfence_vma({{
MISA misa = xc->readMiscReg(MISCREG_ISA);
if (!misa.rvs) {
return std::make_shared<IllegalInstFault>(
"sfence_vma can't execute without RVS",
machInst);
}
STATUS status = xc->readMiscReg(MISCREG_STATUS);
auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
if (pm == PRV_U || (pm == PRV_S && status.tvm == 1)) {
return std::make_shared<IllegalInstFault>(
"sfence in user mode or TVM enabled",
machInst);
}
xc->tcBase()->getMMUPtr()->demapPage(Rs1, Rs2);
}}, IsNonSpeculative, IsSerializeAfter, No_OpClass);
0x18: mret({{
if (xc->readMiscReg(MISCREG_PRV) != PRV_M) {
return std::make_shared<IllegalInstFault>(
"mret at lower privilege", machInst);
NPC = NPC;
} else {
STATUS status = xc->readMiscReg(MISCREG_STATUS);
xc->setMiscReg(MISCREG_PRV, status.mpp);
xc->setMiscReg(MISCREG_NMIE, 1);
status.mie = status.mpie;
status.mpie = 1;
status.mpp = PRV_U;
xc->setMiscReg(MISCREG_STATUS, status);
NPC = xc->readMiscReg(MISCREG_MEPC);
}
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
}
}
format CSROp {
0x1: csrrw({{
Rd = rvSext(data);
data = rvZext(Rs1);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x2: csrrs({{
Rd = rvSext(data);
data = rvZext(data | Rs1);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x3: csrrc({{
Rd = rvSext(data);
data = rvZext(data & ~Rs1);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x5: csrrwi({{
Rd = rvSext(data);
data = rvZext(uimm);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x6: csrrsi({{
Rd = rvSext(data);
data = rvZext(data | uimm);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
0x7: csrrci({{
Rd = rvSext(data);
data = rvZext(data & ~uimm);
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
}
}
0x1e: M5Op::M5Op();
}
}