This commit adds support for vector unit-stride segment store operations for RISC-V (vssegXeXX). This implementation is based in two types of microops: - VsSegIntrlv microops that properly interleave source registers into structs. - VsSeg microops that store data in memory as contiguous structs of several fields. Change-Id: Id80dd4e781743a60eb76c18b6a28061f8e9f723d Gem5 issue: https://github.com/gem5/gem5/issues/382
5034 lines
233 KiB
C++
5034 lines
233 KiB
C++
// -*- mode:c++ -*-
|
|
|
|
// Copyright (c) 2015 RISC-V Foundation
|
|
// Copyright (c) 2017 The University of Virginia
|
|
// Copyright (c) 2020 Barkhausen Institut
|
|
// Copyright (c) 2021 StreamComputing Corp
|
|
// Copyright (c) 2022 Google LLC
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met: redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer;
|
|
// redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution;
|
|
// neither the name of the copyright holders nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
////////////////////////////////////////////////////////////////////
|
|
//
|
|
// The RISC-V ISA decoder
|
|
//
|
|
|
|
// In theory, all registers should be sign extended if not operating in the
|
|
// full MXLEN register, but that will cause memory address out of range as it is
|
|
// always regarded as uint64. So we'll zero extend PC related registers and
|
|
// memory address, and sign extend others.
|
|
decode QUADRANT default Unknown::unknown() {
|
|
0x0: decode COPCODE {
|
|
0x0: CIAddi4spnOp::c_addi4spn({{
|
|
imm = CIMM8<1:1> << 2 |
|
|
CIMM8<0:0> << 3 |
|
|
CIMM8<7:6> << 4 |
|
|
CIMM8<5:2> << 6;
|
|
}}, {{
|
|
if (imm == 0)
|
|
return std::make_shared<IllegalInstFault>("immediate = 0",
|
|
machInst);
|
|
Rp2 = rvSext(sp + imm);
|
|
}}, uint64_t);
|
|
format CompressedLoad {
|
|
0x1: c_fld({{
|
|
offset = CIMM3 << 3 | CIMM2 << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
// Mutating any floating point register changes the FS bit
|
|
// of the STATUS CSR.
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
Fp2_bits = Mem;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x2: c_lw({{
|
|
offset = CIMM2<1:1> << 2 |
|
|
CIMM3 << 3 |
|
|
CIMM2<0:0> << 6;
|
|
}}, {{
|
|
Rp2_sd = Mem_sw;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x3: decode RVTYPE {
|
|
0x0: c_flw({{
|
|
offset = CIMM2<1:1> << 2 |
|
|
CIMM3 << 3 |
|
|
CIMM2<0:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
freg_t fd = freg(f32(Mem_uw));
|
|
Fp2_bits = fd.v;
|
|
}}, {{
|
|
EA = (uint32_t)(Rp1_uw + offset);
|
|
}});
|
|
0x1: c_ld({{
|
|
offset = CIMM3 << 3 | CIMM2 << 6;
|
|
}}, {{
|
|
Rp2_sd = Mem_sd;
|
|
}}, {{
|
|
EA = Rp1 + offset;
|
|
}});
|
|
}
|
|
}
|
|
0x4: decode CFUNCT6LOW3 {
|
|
format CompressedLoad {
|
|
0x0: c_lbu({{
|
|
offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
|
|
}}, {{
|
|
Rp2 = Mem_ub;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x1: decode CFUNCT1BIT6 {
|
|
0x0: c_lhu({{
|
|
offset = CIMM2<0:0> << 1;
|
|
}}, {{
|
|
Rp2 = Mem_uh;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x1: c_lh({{
|
|
offset = CIMM2<0:0> << 1;
|
|
}}, {{
|
|
Rp2_sd = Mem_sh;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
}
|
|
}
|
|
format CompressedStore {
|
|
0x2: c_sb({{
|
|
offset = (CIMM2<0:0> << 1) | CIMM2<1:1>;
|
|
}}, {{
|
|
Mem_ub = Rp2_ub;
|
|
}}, ea_code={{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x3: c_sh({{
|
|
offset = (CIMM2<0:0> << 1);
|
|
}}, {{
|
|
Mem_uh = Rp2_uh;
|
|
}}, ea_code={{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
}
|
|
}
|
|
format CompressedStore {
|
|
0x5: c_fsd({{
|
|
offset = CIMM3 << 3 | CIMM2 << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
Mem = Fp2_bits;
|
|
}}, {{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x6: c_sw({{
|
|
offset = CIMM2<1:1> << 2 |
|
|
CIMM3 << 3 |
|
|
CIMM2<0:0> << 6;
|
|
}}, {{
|
|
Mem_uw = Rp2_uw;
|
|
}}, ea_code={{
|
|
EA = rvZext(Rp1 + offset);
|
|
}});
|
|
0x7: decode RVTYPE {
|
|
0x0: c_fsw({{
|
|
offset = CIMM2<1:1> << 2 |
|
|
CIMM3 << 3 |
|
|
CIMM2<0:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
Mem_uw = unboxF32(boxF32(Fs2_bits));
|
|
}}, {{
|
|
EA = (uint32_t)(Rp1_uw + offset);
|
|
}});
|
|
0x1: c_sd({{
|
|
offset = CIMM3 << 3 | CIMM2 << 6;
|
|
}}, {{
|
|
Mem_ud = Rp2_ud;
|
|
}}, {{
|
|
EA = Rp1 + offset;
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
0x1: decode COPCODE {
|
|
0x0: CIOp::c_addi({{
|
|
imm = sext<6>(CIMM5 | (CIMM1 << 5));
|
|
}}, {{
|
|
if ((RC1 == 0) != (imm == 0)) {
|
|
if (RC1 == 0) {
|
|
// imm != 0 is HINT
|
|
} else {
|
|
// imm == 0 is HINT
|
|
}
|
|
}
|
|
Rc1_sd = rvSext(Rc1_sd + imm);
|
|
}});
|
|
0x1: decode RVTYPE {
|
|
0x0: CJOp::c_jal({{
|
|
ra_sw = NPC_uw;
|
|
NPC_uw = PC_uw + imm;
|
|
}}, IsDirectControl, IsUncondControl, IsCall);
|
|
0x1: CIOp::c_addiw({{
|
|
imm = sext<6>(CIMM5 | (CIMM1 << 5));
|
|
}}, {{
|
|
if (RC1 == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x0", machInst);
|
|
}
|
|
Rc1_sw = (int32_t)(Rc1_sw + imm);
|
|
}});
|
|
}
|
|
0x2: CIOp::c_li({{
|
|
imm = sext<6>(CIMM5 | (CIMM1 << 5));
|
|
}}, {{
|
|
// RC1 == 0 is HINT
|
|
Rc1_sd = imm;
|
|
}});
|
|
0x3: decode RC1 {
|
|
0x2: CIOp::c_addi16sp({{
|
|
imm = sext<10>((CIMM5<4:4> << 4) |
|
|
(CIMM5<0:0> << 5) |
|
|
(CIMM5<3:3> << 6) |
|
|
(CIMM5<2:1> << 7) |
|
|
(CIMM1 << 9));
|
|
}}, {{
|
|
if (imm == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"immediate = 0", machInst);
|
|
}
|
|
sp_sd = rvSext(sp_sd + imm);
|
|
}});
|
|
default: CIOp::c_lui({{
|
|
imm = sext<6>(CIMM5 | (CIMM1 << 5)) << 12;
|
|
}}, {{
|
|
// RC1 == 0 is HINT
|
|
if (imm == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"immediate = 0", machInst);
|
|
}
|
|
Rc1_sd = imm;
|
|
}});
|
|
}
|
|
0x4: decode CFUNCT2HIGH {
|
|
format CIOp {
|
|
0x0: c_srli({{
|
|
imm = CIMM5 | (CIMM1 << 5);
|
|
}}, {{
|
|
if (rvSelect((bool)CIMM1, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
if (imm == 0) {
|
|
// C.SRLI64, HINT for RV32/RV64
|
|
}
|
|
// The MSB can never be 1, hence no need to sign ext.
|
|
Rp1 = rvZext(Rp1) >> imm;
|
|
}}, uint64_t);
|
|
0x1: c_srai({{
|
|
imm = CIMM5 | (CIMM1 << 5);
|
|
}}, {{
|
|
if (rvSelect((bool)CIMM1, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
if (imm == 0) {
|
|
// C.SRAI64, HINT for RV32/RV64
|
|
}
|
|
Rp1_sd = rvSext(Rp1_sd) >> imm;
|
|
}}, uint64_t);
|
|
0x2: c_andi({{
|
|
imm = CIMM5;
|
|
if (CIMM1 > 0)
|
|
imm |= ~((uint64_t)0x1F);
|
|
}}, {{
|
|
Rp1 = rvSext(Rp1 & imm);
|
|
}}, uint64_t);
|
|
}
|
|
format CompressedROp {
|
|
0x3: decode CFUNCT1 {
|
|
0x0: decode CFUNCT2LOW {
|
|
0x0: c_sub({{
|
|
Rp1 = rvSext(Rp1 - Rp2);
|
|
}});
|
|
0x1: c_xor({{
|
|
Rp1 = rvSext(Rp1 ^ Rp2);
|
|
}});
|
|
0x2: c_or({{
|
|
Rp1 = rvSext(Rp1 | Rp2);
|
|
}});
|
|
0x3: c_and({{
|
|
Rp1 = rvSext(Rp1 & Rp2);
|
|
}});
|
|
}
|
|
0x1: decode CFUNCT2LOW {
|
|
0x0: decode RVTYPE {
|
|
0x1: c_subw({{
|
|
Rp1_sd = (int32_t)Rp1_sd - Rp2_sw;
|
|
}});
|
|
}
|
|
0x1: decode RVTYPE {
|
|
0x1: c_addw({{
|
|
Rp1_sd = (int32_t)Rp1_sd + Rp2_sw;
|
|
}});
|
|
}
|
|
0x2: c_mul({{
|
|
Rp1_sd = rvSext(Rp1_sd * Rp2_sd);
|
|
}}, IntMultOp);
|
|
0x3: decode RP2 {
|
|
0x0: c_zext_b({{
|
|
Rp1 = Rp1 & 0xFFULL;
|
|
}});
|
|
0x1: c_sext_b({{
|
|
Rp1 = sext<8>(Rp1 & 0xFFULL);
|
|
}});
|
|
0x2: c_zext_h({{
|
|
Rp1 = Rp1 & 0xFFFFULL;
|
|
}});
|
|
0x3: c_sext_h({{
|
|
Rp1 = sext<16>(Rp1 & 0xFFFFULL);
|
|
}});
|
|
0x4: decode RVTYPE {
|
|
0x1: c_zext_w({{
|
|
Rp1 = bits(Rp1, 31, 0);
|
|
}});
|
|
}
|
|
0x5: c_not({{
|
|
Rp1 = ~Rp1;
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
0x5: CJOp::c_j({{
|
|
NPC = rvZext(PC + imm);
|
|
}}, IsDirectControl, IsUncondControl);
|
|
format CBOp {
|
|
0x6: c_beqz({{
|
|
if (rvSext(Rp1) == 0)
|
|
NPC = rvZext(PC + imm);
|
|
else
|
|
NPC = NPC;
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x7: c_bnez({{
|
|
if (rvSext(Rp1) != 0)
|
|
NPC = rvZext(PC + imm);
|
|
else
|
|
NPC = NPC;
|
|
}}, IsDirectControl, IsCondControl);
|
|
}
|
|
}
|
|
0x2: decode COPCODE {
|
|
0x0: CIOp::c_slli({{
|
|
imm = CIMM5 | (CIMM1 << 5);
|
|
}}, {{
|
|
if (rvSelect((bool)CIMM1, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
if (imm == 0) {
|
|
// C.SLLI64, HINT for RV32/RV64
|
|
}
|
|
// RC1 == 0 is HINT
|
|
Rc1 = rvSext(Rc1 << imm);
|
|
}}, uint64_t);
|
|
format CompressedLoad {
|
|
0x1: c_fldsp({{
|
|
offset = CIMM5<4:3> << 3 |
|
|
CIMM1 << 5 |
|
|
CIMM5<2:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
Fc1_bits = Mem;
|
|
}}, {{
|
|
EA = rvZext(sp + offset);
|
|
}});
|
|
0x2: c_lwsp({{
|
|
offset = CIMM5<4:2> << 2 |
|
|
CIMM1 << 5 |
|
|
CIMM5<1:0> << 6;
|
|
}}, {{
|
|
if (RC1 == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x0", machInst);
|
|
}
|
|
Rc1_sw = Mem_sw;
|
|
}}, {{
|
|
EA = rvZext(sp + offset);
|
|
}});
|
|
0x3: decode RVTYPE {
|
|
0x0: c_flwsp({{
|
|
offset = CIMM5<4:2> << 2 |
|
|
CIMM1 << 5 |
|
|
CIMM5<1:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
freg_t fd;
|
|
fd = freg(f32(Mem_uw));
|
|
Fd_bits = fd.v;
|
|
}}, {{
|
|
EA = (uint32_t)(sp_uw + offset);
|
|
}});
|
|
0x1: c_ldsp({{
|
|
offset = CIMM5<4:3> << 3 |
|
|
CIMM1 << 5 |
|
|
CIMM5<2:0> << 6;
|
|
}}, {{
|
|
if (RC1 == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x0", machInst);
|
|
}
|
|
Rc1_sd = Mem_sd;
|
|
}}, {{
|
|
EA = sp + offset;
|
|
}});
|
|
}
|
|
}
|
|
0x4: decode CFUNCT1 {
|
|
0x0: decode RC2 {
|
|
0x0: Jump::c_jr({{
|
|
if (RC1 == 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x0", machInst);
|
|
}
|
|
NPC = rvZext(Rc1);
|
|
}}, IsIndirectControl, IsUncondControl);
|
|
default: CROp::c_mv({{
|
|
// RC1 == 0 is HINT
|
|
Rc1 = rvSext(Rc2);
|
|
}});
|
|
}
|
|
0x1: decode RC2 {
|
|
0x0: decode RC1 {
|
|
0x0: SystemOp::c_ebreak({{
|
|
return std::make_shared<BreakpointFault>(
|
|
xc->pcState());
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
default: Jump::c_jalr({{
|
|
ra = rvSext(NPC);
|
|
NPC = rvZext(Rc1);
|
|
}}, IsIndirectControl, IsUncondControl, IsCall);
|
|
}
|
|
default: CompressedROp::c_add({{
|
|
// RC1 == 0 is HINT
|
|
Rc1_sd = rvSext(Rc1_sd + Rc2_sd);
|
|
}});
|
|
}
|
|
}
|
|
format CompressedStore {
|
|
0x5: c_fsdsp({{
|
|
offset = CIMM6<5:3> << 3 |
|
|
CIMM6<2:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
Mem_ud = Fc2_bits;
|
|
}}, {{
|
|
EA = rvZext(sp + offset);
|
|
}});
|
|
0x6: c_swsp({{
|
|
offset = CIMM6<5:2> << 2 |
|
|
CIMM6<1:0> << 6;
|
|
}}, {{
|
|
Mem_uw = Rc2_uw;
|
|
}}, {{
|
|
EA = rvZext(sp + offset);
|
|
}});
|
|
0x7: decode RVTYPE {
|
|
0x0: c_fswsp({{
|
|
offset = CIMM6<5:2> << 2 |
|
|
CIMM6<1:0> << 6;
|
|
}}, {{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>("FPU is off",
|
|
machInst);
|
|
|
|
Mem_uw = unboxF32(boxF32(Fs2_bits));
|
|
}}, {{
|
|
EA = (uint32_t)(sp_uw + offset);
|
|
}});
|
|
0x1: c_sdsp({{
|
|
offset = CIMM6<5:3> << 3 |
|
|
CIMM6<2:0> << 6;
|
|
}}, {{
|
|
Mem = Rc2;
|
|
}}, {{
|
|
EA = sp + offset;
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
0x3: decode OPCODE5 {
|
|
0x00: decode FUNCT3 {
|
|
format Load {
|
|
0x0: lb({{
|
|
Rd_sd = Mem_sb;
|
|
}});
|
|
0x1: lh({{
|
|
Rd_sd = Mem_sh;
|
|
}});
|
|
0x2: lw({{
|
|
Rd_sd = Mem_sw;
|
|
}});
|
|
0x3: decode RVTYPE {
|
|
0x1: ld({{
|
|
Rd_sd = Mem_sd;
|
|
}});
|
|
}
|
|
0x4: lbu({{
|
|
Rd = Mem_ub;
|
|
}});
|
|
0x5: lhu({{
|
|
Rd = Mem_uh;
|
|
}});
|
|
0x6: decode RVTYPE {
|
|
0x1: lwu({{
|
|
Rd = Mem_uw;
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
0x01: decode FUNCT3 {
|
|
format Load {
|
|
0x1: flh({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
freg_t fd;
|
|
fd = freg(f16(Mem_uh));
|
|
Fd_bits = fd.v;
|
|
}}, inst_flags=FloatMemReadOp);
|
|
0x2: flw({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
freg_t fd;
|
|
fd = freg(f32(Mem_uw));
|
|
Fd_bits = fd.v;
|
|
}}, inst_flags=FloatMemReadOp);
|
|
0x3: fld({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
status.fs = FPUStatus::DIRTY;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
|
|
freg_t fd;
|
|
fd = freg(f64(Mem));
|
|
Fd_bits = fd.v;
|
|
}}, inst_flags=FloatMemReadOp);
|
|
}
|
|
|
|
0x0: decode MOP {
|
|
0x0: decode LUMOP {
|
|
0x00: decode NF {
|
|
0x00: VleOp::vle8_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideLoadOp);
|
|
format VlSegOp {
|
|
0x01: vlseg2e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x02: vlseg3e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x03: vlseg4e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x04: vlseg5e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x05: vlseg6e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x06: vlseg7e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x07: vlseg8e8_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
|
|
i < this->microVl) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
}
|
|
}
|
|
0x08: decode NF {
|
|
format VlWholeOp {
|
|
0x0: vl1re8_v({{
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x1: vl2re8_v({{
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x3: vl4re8_v({{
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x7: vl8re8_v({{
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
}
|
|
}
|
|
0x0b: VlmOp::vlm_v({{
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
}}, inst_flags=VectorUnitStrideMaskLoadOp);
|
|
0x10: VleOp::vle8ff_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl && i < this->faultIdx) {
|
|
Vd_ub[i] = Mem_vc.as<uint8_t>()[i];
|
|
} else {
|
|
Vd_ub[i] = Vs2_ub[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
|
}
|
|
0x1: VlIndexOp::vluxei8_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ub[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
0x2: VlStrideOp::vlse8_v({{
|
|
Vd_ub[microIdx] = Mem_vc.as<uint8_t>()[0];
|
|
}}, inst_flags=VectorStridedLoadOp);
|
|
0x3: VlIndexOp::vloxei8_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ub[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
}
|
|
0x5: decode MOP {
|
|
0x0: decode LUMOP {
|
|
0x00: decode NF {
|
|
0x00: VleOp::vle16_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideLoadOp);
|
|
format VlSegOp {
|
|
0x01: vlseg2e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x02: vlseg3e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x03: vlseg4e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x04: vlseg5e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x05: vlseg6e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x06: vlseg7e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x07: vlseg8e16_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
|
|
i < this->microVl) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
}
|
|
}
|
|
0x08: decode NF {
|
|
format VlWholeOp {
|
|
0x0: vl1re16_v({{
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x1: vl2re16_v({{
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x3: vl4re16_v({{
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x7: vl8re16_v({{
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
}
|
|
}
|
|
0x10: VleOp::vle16ff_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl && i < this->faultIdx) {
|
|
Vd_uh[i] = Mem_vc.as<uint16_t>()[i];
|
|
} else {
|
|
Vd_uh[i] = Vs2_uh[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
|
}
|
|
0x1: VlIndexOp::vluxei16_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uh[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
0x2: VlStrideOp::vlse16_v({{
|
|
Vd_uh[microIdx] = Mem_vc.as<uint16_t>()[0];
|
|
}}, inst_flags=VectorStridedLoadOp);
|
|
0x3: VlIndexOp::vloxei16_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uh[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
}
|
|
0x6: decode MOP {
|
|
0x0: decode LUMOP {
|
|
0x00: decode NF {
|
|
0x00: VleOp::vle32_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideLoadOp);
|
|
format VlSegOp {
|
|
0x01: vlseg2e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x02: vlseg3e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x03: vlseg4e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x04: vlseg5e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x05: vlseg6e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x06: vlseg7e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x07: vlseg8e32_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
|
|
i < this->microVl) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
}
|
|
}
|
|
0x08: decode NF {
|
|
format VlWholeOp {
|
|
0x0: vl1re32_v({{
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x1: vl2re32_v({{
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x3: vl4re32_v({{
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x7: vl8re32_v({{
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
}
|
|
}
|
|
0x10: VleOp::vle32ff_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl && i < this->faultIdx) {
|
|
Vd_uw[i] = Mem_vc.as<uint32_t>()[i];
|
|
} else {
|
|
Vd_uw[i] = Vs2_uw[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
|
}
|
|
0x1: VlIndexOp::vluxei32_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uw[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
0x2: VlStrideOp::vlse32_v({{
|
|
Vd_uw[microIdx] = Mem_vc.as<uint32_t>()[0];
|
|
}}, inst_flags=VectorStridedLoadOp);
|
|
0x3: VlIndexOp::vloxei32_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uw[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
}
|
|
0x7: decode MOP {
|
|
0x0: decode LUMOP {
|
|
0x00: decode NF {
|
|
0x00: VleOp::vle64_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideLoadOp);
|
|
format VlSegOp {
|
|
0x01: vlseg2e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 2)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x02: vlseg3e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 3)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x03: vlseg4e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 4)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x04: vlseg5e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 5)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x05: vlseg6e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 6)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x06: vlseg7e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 7)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
0x07: vlseg8e64_v({{
|
|
if ((machInst.vm || elem_mask_vseg(v0, ei + (field * micro_elems), 8)) &&
|
|
i < this->microVl) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideSegmentedLoadOp);
|
|
}
|
|
}
|
|
0x08: decode NF {
|
|
format VlWholeOp {
|
|
0x0: vl1re64_v({{
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x1: vl2re64_v({{
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x3: vl4re64_v({{
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
0x7: vl8re64_v({{
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
}}, inst_flags=VectorWholeRegisterLoadOp);
|
|
}
|
|
}
|
|
0x10: VleOp::vle64ff_v({{
|
|
if ((machInst.vm || elem_mask(v0, ei)) &&
|
|
i < this->microVl && i < this->faultIdx) {
|
|
Vd_ud[i] = Mem_vc.as<uint64_t>()[i];
|
|
} else {
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}
|
|
}}, inst_flags=VectorUnitStrideFaultOnlyFirstLoadOp);
|
|
}
|
|
0x1: VlIndexOp::vluxei64_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ud[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
0x2: VlStrideOp::vlse64_v({{
|
|
Vd_ud[microIdx] = Mem_vc.as<uint64_t>()[0];
|
|
}}, inst_flags=VectorStridedLoadOp);
|
|
0x3: VlIndexOp::vloxei64_v({{
|
|
Vd_vu[vdElemIdx] = Mem_vc.as<vu>()[0];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ud[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedLoadOp);
|
|
}
|
|
}
|
|
|
|
0x03: decode FUNCT3 {
|
|
format FenceOp {
|
|
0x0: fence({{
|
|
}}, uint64_t, IsReadBarrier, IsWriteBarrier, No_OpClass);
|
|
0x1: fence_i({{
|
|
}}, uint64_t, IsNonSpeculative, IsSerializeAfter,
|
|
IsSquashAfter, No_OpClass);
|
|
}
|
|
|
|
0x2: decode FUNCT12 {
|
|
format CBMOp {
|
|
0x0: cbo_inval({{
|
|
Mem = 0;
|
|
}}, mem_flags=[INVALIDATE, DST_POC]);
|
|
0x1: cbo_clean({{
|
|
Mem = 0;
|
|
}}, mem_flags=[CLEAN, DST_POC]);
|
|
0x2: cbo_flush({{
|
|
Mem = 0;
|
|
}}, mem_flags=[CLEAN, INVALIDATE, DST_POC]);
|
|
0x4: cbo_zero({{
|
|
Mem = 0;
|
|
}}, mem_flags=[CACHE_BLOCK_ZERO]);
|
|
}
|
|
}
|
|
}
|
|
|
|
0x04: decode FUNCT3 {
|
|
0x1: decode FS3 {
|
|
format IOp {
|
|
0x00: slli({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
Rd = rvSext(Rs1 << imm);
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x01: decode RVTYPE {
|
|
0x0: zip({{
|
|
Rd_sw = _rvk_emu_zip_32(Rs1_sw);
|
|
}}, imm_code = {{ imm = SHAMT5; }});
|
|
}
|
|
0x02: decode FS2 {
|
|
0x0: sha256sum0({{
|
|
Rd_sw = _rvk_emu_sha256sum0(Rs1_sw);
|
|
}});
|
|
0x1: sha256sum1({{
|
|
Rd_sw = _rvk_emu_sha256sum1(Rs1_sw);
|
|
}});
|
|
0x2: sha256sig0({{
|
|
Rd_sw = _rvk_emu_sha256sig0(Rs1_sw);
|
|
}});
|
|
0x3: sha256sig1({{
|
|
Rd_sw = _rvk_emu_sha256sig1(Rs1_sw);
|
|
}});
|
|
0x4: decode RVTYPE {
|
|
0x1: sha512sum0({{
|
|
Rd_sd = _rvk_emu_sha512sum0(Rs1_sd);
|
|
}});
|
|
}
|
|
0x5: decode RVTYPE {
|
|
0x1: sha512sum1({{
|
|
Rd_sd = _rvk_emu_sha512sum1(Rs1_sd);
|
|
}});
|
|
}
|
|
0x6: decode RVTYPE {
|
|
0x1: sha512sig0({{
|
|
Rd_sd = _rvk_emu_sha512sig0(Rs1_sd);
|
|
}});
|
|
}
|
|
0x7: decode RVTYPE {
|
|
0x1: sha512sig1({{
|
|
Rd_sd = _rvk_emu_sha512sig1(Rs1_sd);
|
|
}});
|
|
}
|
|
0x8: sm3p0({{
|
|
Rd_sw = _rvk_emu_sm3p0(Rs1_sw);
|
|
}});
|
|
0x9: sm3p1({{
|
|
Rd_sw = _rvk_emu_sm3p1(Rs1_sw);
|
|
}});
|
|
}
|
|
0x05: bseti({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 | (UINT64_C(1) << index));
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x06: decode BIT24 {
|
|
0x0: decode RVTYPE {
|
|
0x1: aes64im({{
|
|
Rd_sd = _rvk_emu_aes64im(Rs1_sd);
|
|
}});
|
|
}
|
|
0x1: decode RVTYPE {
|
|
0x1: aes64ks1i({{
|
|
Rd_sd = _rvk_emu_aes64ks1i(Rs1_sd, imm);
|
|
}}, imm_type = int32_t, imm_code={{ imm = RNUM; }});
|
|
}
|
|
}
|
|
0x09: bclri({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 & (~(UINT64_C(1) << index)));
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x0d: binvi({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 ^ (UINT64_C(1) << index));
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
}
|
|
format ROp {
|
|
0x0c: decode RS2 {
|
|
0x00: clz({{
|
|
Rd = (machInst.rv_type == RV32) ? clz32(Rs1) : clz64(Rs1);
|
|
}});
|
|
0x01: ctz({{
|
|
Rd = (machInst.rv_type == RV32) ? ctz32(Rs1) : ctz64(Rs1);
|
|
}});
|
|
0x02: cpop({{
|
|
Rd = (machInst.rv_type == RV32) ? popCount(Rs1<31:0>) : popCount(Rs1);
|
|
}});
|
|
0x04: sext_b({{
|
|
Rd = sext<8>(Rs1_ub);
|
|
}});
|
|
0x05: sext_h({{
|
|
Rd = sext<16>(Rs1_uh);
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
format IOp {
|
|
0x0: addi({{
|
|
Rd_sd = rvSext(Rs1_sd + imm);
|
|
}});
|
|
0x2: slti({{
|
|
Rd = (rvSext(Rs1_sd) < imm) ? 1 : 0;
|
|
}});
|
|
0x3: sltiu({{
|
|
Rd = (rvZext(Rs1) < imm) ? 1 : 0;
|
|
}}, uint64_t, imm_code = {{ imm = rvZext(sext<12>(IMM12)); }});
|
|
0x4: xori({{
|
|
Rd = rvSext(Rs1 ^ imm);
|
|
}}, uint64_t);
|
|
0x5: decode FS3 {
|
|
0x0: srli({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
Rd = rvSext(rvZext(Rs1) >> imm);
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x1: decode RVTYPE {
|
|
0x0: unzip({{
|
|
Rd_sw = _rvk_emu_unzip_32(Rs1_sw);
|
|
}}, imm_code = {{ imm = SHAMT5; }});
|
|
}
|
|
0x5: orc_b({{
|
|
uint64_t result = 0;
|
|
result |= (Rs1<7:0> ? UINT64_C(0xff) : 0x0);
|
|
result |= (Rs1<15:8> ? UINT64_C(0xff) : 0x0) << 8;
|
|
result |= (Rs1<23:16> ? UINT64_C(0xff) : 0x0) << 16;
|
|
result |= (Rs1<31:24> ? UINT64_C(0xff) : 0x0) << 24;
|
|
result |= (Rs1<39:32> ? UINT64_C(0xff) : 0x0) << 32;
|
|
result |= (Rs1<47:40> ? UINT64_C(0xff) : 0x0) << 40;
|
|
result |= (Rs1<55:48> ? UINT64_C(0xff) : 0x0) << 48;
|
|
result |= (Rs1<63:56> ? UINT64_C(0xff) : 0x0) << 56;
|
|
Rd = rvSext(result);
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x8: srai({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
Rd_sd = rvSext(Rs1_sd) >> imm;
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0x9: bexti({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
uint64_t index = imm & rvSelect(32 - 1, 64 - 1);
|
|
Rd = (Rs1 >> index) & 0x1;
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0xc: rori({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
uint64_t xlen = rvSelect(32, 64);
|
|
Rd = rvSext((rvZext(Rs1) >> imm)
|
|
| (Rs1 << ((xlen - imm) & (xlen - 1))));
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0xd: decode RS2 {
|
|
0x18: ROp::rev8({{
|
|
if (rvSelect((bool)SHAMT6BIT5, false)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"shmat[5] != 0", machInst);
|
|
}
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = _rvk_emu_grev_32(Rs1_sd, 0x18);
|
|
} else {
|
|
Rd_sd = _rvk_emu_grev_64(Rs1_sd, 0x38);
|
|
}
|
|
}});
|
|
0x07: ROp::brev8({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = _rvk_emu_brev8_32(Rs1_sd);
|
|
} else {
|
|
Rd_sd = _rvk_emu_brev8_64(Rs1_sd);
|
|
}
|
|
}});
|
|
}
|
|
}
|
|
0x6: ori({{
|
|
Rd = rvSext(Rs1 | imm);
|
|
}}, uint64_t);
|
|
0x7: andi({{
|
|
Rd = rvSext(Rs1 & imm);
|
|
}}, uint64_t);
|
|
}
|
|
}
|
|
|
|
0x05: UOp::auipc({{
|
|
Rd = rvSext(PC + (sext<20>(imm) << 12));
|
|
}});
|
|
|
|
0x06: decode RVTYPE {
|
|
0x1: decode FUNCT3 {
|
|
format IOp {
|
|
0x0: addiw({{
|
|
Rd_sw = (int32_t)(Rs1_sw + imm);
|
|
}}, int32_t);
|
|
0x1: decode FS3 {
|
|
0x0: slliw({{
|
|
Rd_sd = Rs1_sw << imm;
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
|
|
0x1: slli_uw({{
|
|
Rd = ((uint64_t)(Rs1_uw)) << imm;
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT6; }});
|
|
0xc: decode FS2 {
|
|
0x0: clzw({{
|
|
Rd = clz32(Rs1);
|
|
}});
|
|
0x1: ctzw({{
|
|
Rd = ctz32(Rs1);
|
|
}});
|
|
0x2: cpopw({{
|
|
Rd = popCount(Rs1<31:0>);
|
|
}});
|
|
}
|
|
}
|
|
0x5: decode FS3 {
|
|
0x0: srliw({{
|
|
Rd_sd = (int32_t)(Rs1_uw >> imm);
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
|
|
0x8: sraiw({{
|
|
Rd_sd = Rs1_sw >> imm;
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
|
|
0xc: roriw({{
|
|
Rd = (int32_t) ((Rs1_uw >> imm) | (Rs1_uw << ((32 - imm) & (32 - 1))));
|
|
}}, imm_type = uint64_t, imm_code = {{ imm = SHAMT5; }});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
0x08: decode FUNCT3 {
|
|
format Store {
|
|
0x0: sb({{
|
|
Mem_ub = Rs2_ub;
|
|
}});
|
|
0x1: sh({{
|
|
Mem_uh = Rs2_uh;
|
|
}});
|
|
0x2: sw({{
|
|
Mem_uw = Rs2_uw;
|
|
}});
|
|
0x3: decode RVTYPE {
|
|
0x1: sd({{
|
|
Mem_ud = Rs2_ud;
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
0x09: decode FUNCT3 {
|
|
format Store {
|
|
0x1: fsh({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
Mem_uh = unboxF16(boxF16(Fs2_bits));
|
|
}}, inst_flags=FloatMemWriteOp);
|
|
0x2: fsw({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
Mem_uw = unboxF32(boxF32(Fs2_bits));
|
|
}}, inst_flags=FloatMemWriteOp);
|
|
0x3: fsd({{
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
if (status.fs == FPUStatus::OFF)
|
|
return std::make_shared<IllegalInstFault>(
|
|
"FPU is off", machInst);
|
|
|
|
Mem_ud = Fs2_bits;
|
|
}}, inst_flags=FloatMemWriteOp);
|
|
}
|
|
|
|
0x0: decode MOP {
|
|
0x0: decode SUMOP {
|
|
0x00: decode NF {
|
|
0x00: VseOp::vse8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideStoreOp);
|
|
format VsSegOp {
|
|
0x01: vsseg2e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x02: vsseg3e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x03: vsseg4e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x04: vsseg5e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x05: vsseg6e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x06: vsseg7e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x07: vsseg8e8_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
}
|
|
}
|
|
format VsWholeOp {
|
|
0x8: decode NF {
|
|
0x0: vs1r_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorWholeRegisterStoreOp);
|
|
0x1: vs2r_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorWholeRegisterStoreOp);
|
|
0x3: vs4r_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorWholeRegisterStoreOp);
|
|
0x7: vs8r_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorWholeRegisterStoreOp);
|
|
}
|
|
}
|
|
0x0b: VsmOp::vsm_v({{
|
|
Mem_vc.as<uint8_t>()[i] = Vs3_ub[i];
|
|
}}, inst_flags=VectorUnitStrideMaskStoreOp);
|
|
}
|
|
0x1: VsIndexOp::vsuxei8_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ub[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
0x2: VsStrideOp::vsse8_v({{
|
|
Mem_vc.as<uint8_t>()[0] = Vs3_ub[microIdx];
|
|
}}, inst_flags=VectorStridedStoreOp);
|
|
0x3: VsIndexOp::vsoxei8_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ub[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
}
|
|
0x5: decode MOP {
|
|
0x0: decode SUMOP {
|
|
0x00: decode NF {
|
|
0x00: VseOp::vse16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideStoreOp);
|
|
format VsSegOp {
|
|
0x01: vsseg2e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x02: vsseg3e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x03: vsseg4e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x04: vsseg5e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x05: vsseg6e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x06: vsseg7e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x07: vsseg8e16_v({{
|
|
Mem_vc.as<uint16_t>()[i] = Vs3_uh[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
}
|
|
}
|
|
}
|
|
0x1: VsIndexOp::vsuxei16_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uh[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
0x2: VsStrideOp::vsse16_v({{
|
|
Mem_vc.as<uint16_t>()[0] = Vs3_uh[microIdx];
|
|
}}, inst_flags=VectorStridedStoreOp);
|
|
0x3: VsIndexOp::vsoxei16_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uh[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
}
|
|
0x6: decode MOP {
|
|
0x0: decode SUMOP {
|
|
0x00: decode NF {
|
|
0x00: VseOp::vse32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideStoreOp);
|
|
format VsSegOp {
|
|
0x01: vsseg2e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x02: vsseg3e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x03: vsseg4e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x04: vsseg5e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x05: vsseg6e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x06: vsseg7e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x07: vsseg8e32_v({{
|
|
Mem_vc.as<uint32_t>()[i] = Vs3_uw[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
}
|
|
}
|
|
}
|
|
0x1: VsIndexOp::vsuxei32_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uw[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
0x2: VsStrideOp::vsse32_v({{
|
|
Mem_vc.as<uint32_t>()[0] = Vs3_uw[microIdx];
|
|
}}, inst_flags=VectorStridedStoreOp);
|
|
0x3: VsIndexOp::vsoxei32_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_uw[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
}
|
|
0x7: decode MOP {
|
|
0x0: decode SUMOP {
|
|
0x00: decode NF {
|
|
0x00: VseOp::vse64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideStoreOp);
|
|
format VsSegOp {
|
|
0x01: vsseg2e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x02: vsseg3e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x03: vsseg4e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x04: vsseg5e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x05: vsseg6e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x06: vsseg7e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
0x07: vsseg8e64_v({{
|
|
Mem_vc.as<uint64_t>()[i] = Vs3_ud[i];
|
|
}}, inst_flags=VectorUnitStrideSegmentedStoreOp);
|
|
}
|
|
}
|
|
}
|
|
0x1: VsIndexOp::vsuxei64_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ud[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
0x2: VsStrideOp::vsse64_v({{
|
|
Mem_vc.as<uint64_t>()[0] = Vs3_ud[microIdx];
|
|
}}, inst_flags=VectorStridedStoreOp);
|
|
0x3: VsIndexOp::vsoxei64_v({{
|
|
Mem_vc.as<vu>()[0] = Vs3_vu[vs3ElemIdx];
|
|
}}, {{
|
|
EA = Rs1 + Vs2_ud[vs2ElemIdx];
|
|
}}, inst_flags=VectorIndexedStoreOp);
|
|
}
|
|
}
|
|
|
|
0x0b: decode FUNCT3 {
|
|
0x2: decode AMOFUNCT {
|
|
0x2: LoadReserved::lr_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, mem_flags=LLSC);
|
|
0x3: StoreCond::sc_w({{
|
|
Mem_uw = Rs2_uw;
|
|
}}, {{
|
|
Rd = rvSext(result);
|
|
}}, inst_flags=IsStoreConditional, mem_flags=LLSC);
|
|
0x0: AtomicMemOp::amoadd_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int32_t> *amo_op =
|
|
new AtomicGenericOp<int32_t>(Rs2_sw,
|
|
[](int32_t* b, int32_t a){ *b += a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x1: AtomicMemOp::amoswap_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x4: AtomicMemOp::amoxor_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ *b ^= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x8: AtomicMemOp::amoor_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ *b |= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0xc: AtomicMemOp::amoand_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ *b &= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x10: AtomicMemOp::amomin_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int32_t> *amo_op =
|
|
new AtomicGenericOp<int32_t>(Rs2_sw,
|
|
[](int32_t* b, int32_t a){ if (a < *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x14: AtomicMemOp::amomax_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int32_t> *amo_op =
|
|
new AtomicGenericOp<int32_t>(Rs2_sw,
|
|
[](int32_t* b, int32_t a){ if (a > *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x18: AtomicMemOp::amominu_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ if (a < *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x1c: AtomicMemOp::amomaxu_w({{
|
|
Rd_sd = Mem_sw;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint32_t> *amo_op =
|
|
new AtomicGenericOp<uint32_t>(Rs2_uw,
|
|
[](uint32_t* b, uint32_t a){ if (a > *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: decode AMOFUNCT {
|
|
0x2: LoadReserved::lr_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, mem_flags=LLSC);
|
|
0x3: StoreCond::sc_d({{
|
|
Mem = Rs2;
|
|
}}, {{
|
|
Rd = result;
|
|
}}, mem_flags=LLSC, inst_flags=IsStoreConditional);
|
|
0x0: AtomicMemOp::amoadd_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int64_t> *amo_op =
|
|
new AtomicGenericOp<int64_t>(Rs2_sd,
|
|
[](int64_t* b, int64_t a){ *b += a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x1: AtomicMemOp::amoswap_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){ *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x4: AtomicMemOp::amoxor_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){ *b ^= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x8: AtomicMemOp::amoor_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){ *b |= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0xc: AtomicMemOp::amoand_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){ *b &= a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x10: AtomicMemOp::amomin_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int64_t> *amo_op =
|
|
new AtomicGenericOp<int64_t>(Rs2_sd,
|
|
[](int64_t* b, int64_t a){ if (a < *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x14: AtomicMemOp::amomax_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<int64_t> *amo_op =
|
|
new AtomicGenericOp<int64_t>(Rs2_sd,
|
|
[](int64_t* b, int64_t a){ if (a > *b) *b = a; });
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x18: AtomicMemOp::amominu_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){
|
|
if (a < *b) *b = a;
|
|
});
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
0x1c: AtomicMemOp::amomaxu_d({{
|
|
Rd_sd = Mem_sd;
|
|
}}, {{
|
|
TypedAtomicOpFunctor<uint64_t> *amo_op =
|
|
new AtomicGenericOp<uint64_t>(Rs2_ud,
|
|
[](uint64_t* b, uint64_t a){
|
|
if (a > *b) *b = a;
|
|
});
|
|
}}, mem_flags=ATOMIC_RETURN_OP);
|
|
}
|
|
}
|
|
}
|
|
0x0c: decode FUNCT3 {
|
|
format ROp {
|
|
0x0: decode KFUNCT5 {
|
|
0x00: decode BS {
|
|
0x0: add({{
|
|
Rd = rvSext(Rs1_sd + Rs2_sd);
|
|
}});
|
|
0x1: sub({{
|
|
Rd = rvSext(Rs1_sd - Rs2_sd);
|
|
}});
|
|
}
|
|
0x01: decode BS {
|
|
0x0: mul({{
|
|
Rd = rvSext(Rs1_sd * Rs2_sd);
|
|
}}, IntMultOp);
|
|
}
|
|
0x08: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sum0r({{
|
|
Rd_sw = _rvk_emu_sha512sum0r(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x09: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sum1r({{
|
|
Rd_sw = _rvk_emu_sha512sum1r(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x0a: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sig0l({{
|
|
Rd_sw = _rvk_emu_sha512sig0l(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x0b: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sig1l({{
|
|
Rd_sw = _rvk_emu_sha512sig1l(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x0e: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sig0h({{
|
|
Rd_sw = _rvk_emu_sha512sig0h(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x0f: decode BS {
|
|
0x1: decode RVTYPE {
|
|
0x0: sha512sig1h({{
|
|
Rd_sw = _rvk_emu_sha512sig1h(Rs1_sw, Rs2_sw);
|
|
}});
|
|
}
|
|
}
|
|
0x11: decode RVTYPE {
|
|
0x0: BSOp::aes32esi({{
|
|
Rd_sw = _rvk_emu_aes32esi(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
}
|
|
0x13: decode RVTYPE {
|
|
0x0: BSOp::aes32esmi({{
|
|
Rd_sw = _rvk_emu_aes32esmi(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
}
|
|
0x15: decode RVTYPE {
|
|
0x0: BSOp::aes32dsi({{
|
|
Rd_sw = _rvk_emu_aes32dsi(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
}
|
|
0x17: decode RVTYPE {
|
|
0x0: BSOp::aes32dsmi({{
|
|
Rd_sw = _rvk_emu_aes32dsmi(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
}
|
|
0x18: BSOp::sm4ed({{
|
|
Rd_sw = _rvk_emu_sm4ed(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
0x19: decode BS {
|
|
0x0: decode RVTYPE {
|
|
0x1: aes64es({{
|
|
Rd_sd = _rvk_emu_aes64es(Rs1_sd, Rs2_sd);
|
|
}});
|
|
}
|
|
}
|
|
0x1a: BSOp::sm4ks({{
|
|
Rd_sw = _rvk_emu_sm4ks(Rs1_sw, Rs2_sw, bs);
|
|
}});
|
|
0x1b: decode BS {
|
|
0x0: decode RVTYPE {
|
|
0x1: aes64esm({{
|
|
Rd_sd = _rvk_emu_aes64esm(Rs1_sd, Rs2_sd);
|
|
}});
|
|
}
|
|
}
|
|
0x1d: decode BS {
|
|
0x0: decode RVTYPE {
|
|
0x1: aes64ds({{
|
|
Rd_sd = _rvk_emu_aes64ds(Rs1_sd, Rs2_sd);
|
|
}});
|
|
}
|
|
}
|
|
0x1f: decode BS {
|
|
0x0: decode RVTYPE {
|
|
0x1: aes64dsm({{
|
|
Rd_sd = _rvk_emu_aes64dsm(Rs1_sd, Rs2_sd);
|
|
}});
|
|
}
|
|
0x1: decode RVTYPE {
|
|
0x1: aes64ks2({{
|
|
Rd_sd = _rvk_emu_aes64ks2(Rs1_sd, Rs2_sd);
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
0x1: decode FUNCT7 {
|
|
0x0: sll({{
|
|
Rd = rvSext(Rs1 << rvSelect(Rs2<4:0>, Rs2<5:0>));
|
|
}});
|
|
0x1: mulh({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = mulh<int32_t>(Rs1_sd, Rs2_sd);
|
|
} else {
|
|
Rd_sd = mulh<int64_t>(Rs1_sd, Rs2_sd);
|
|
}
|
|
}}, IntMultOp);
|
|
0x5: clmul({{
|
|
uint64_t result = 0;
|
|
for (int i = 0; i < rvSelect(32, 64); i++) {
|
|
if ((Rs2 >> i) & 1) {
|
|
result ^= Rs1 << i;
|
|
}
|
|
}
|
|
Rd = rvSext(result);
|
|
}});
|
|
0x14: bset({{
|
|
Rs2 &= rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 | (UINT64_C(1) << Rs2));
|
|
}});
|
|
0x24: bclr({{
|
|
Rs2 &= rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 & (~(UINT64_C(1) << Rs2)));
|
|
}});
|
|
0x30: rol({{
|
|
uint64_t xlen = rvSelect(32, 64);
|
|
int shamt = Rs2 & (xlen - 1);
|
|
Rd = rvSext((Rs1 << shamt)
|
|
| (rvZext(Rs1) >> ((xlen - shamt) & (xlen - 1))));
|
|
}});
|
|
0x34: binv({{
|
|
Rs2 &= rvSelect(32 - 1, 64 - 1);
|
|
Rd = rvSext(Rs1 ^ (UINT64_C(1) << Rs2));
|
|
}});
|
|
}
|
|
0x2: decode FUNCT7 {
|
|
0x0: slt({{
|
|
Rd = (rvSext(Rs1_sd) < rvSext(Rs2_sd)) ? 1 : 0;
|
|
}});
|
|
0x1: mulhsu({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = mulhsu<int32_t>(Rs1_sd, Rs2);
|
|
} else {
|
|
Rd_sd = mulhsu<int64_t>(Rs1_sd, Rs2);
|
|
}
|
|
}}, IntMultOp);
|
|
0x5: clmulr({{
|
|
uint64_t result = 0;
|
|
uint64_t xlen = rvSelect(32, 64);
|
|
uint64_t zextRs1 = rvZext(Rs1);
|
|
for (int i = 0; i < xlen; i++) {
|
|
if ((Rs2 >> i) & 1) {
|
|
result ^= zextRs1 >> (xlen-i-1);
|
|
}
|
|
}
|
|
Rd = rvSext(result);
|
|
}});
|
|
0x10: sh1add({{
|
|
Rd = rvSext((Rs1 << 1) + Rs2);
|
|
}});
|
|
0x14: xperm4({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = _rvk_emu_xperm4_32(Rs1_sd, Rs2_sd);
|
|
} else {
|
|
Rd_sd = _rvk_emu_xperm4_64(Rs1_sd, Rs2_sd);
|
|
}
|
|
}});
|
|
}
|
|
0x3: decode FUNCT7 {
|
|
0x0: sltu({{
|
|
Rd = (rvZext(Rs1) < rvZext(Rs2)) ? 1 : 0;
|
|
}});
|
|
0x1: mulhu({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd = (int32_t)mulhu<uint32_t>(Rs1, Rs2);
|
|
} else {
|
|
Rd = mulhu<uint64_t>(Rs1, Rs2);
|
|
}
|
|
}}, IntMultOp);
|
|
0x5: clmulh({{
|
|
uint64_t result = 0;
|
|
uint64_t xlen = rvSelect(32, 64);
|
|
uint64_t zextRs1 = rvZext(Rs1);
|
|
for (int i = 1; i < xlen; i++) {
|
|
if ((Rs2 >> i) & 1) {
|
|
result ^= zextRs1 >> (xlen-i);
|
|
}
|
|
}
|
|
// The MSB can never be 1, no need to sign extend.
|
|
Rd = result;
|
|
}});
|
|
}
|
|
0x4: decode FUNCT7 {
|
|
0x0: xor({{
|
|
Rd = rvSext(Rs1 ^ Rs2);
|
|
}});
|
|
0x1: div({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = div<int32_t>(Rs1, Rs2);
|
|
} else {
|
|
Rd_sd = div<int64_t>(Rs1, Rs2);
|
|
}
|
|
}}, IntDivOp);
|
|
0x4: pack({{
|
|
int xlen = rvSelect(32, 64);
|
|
Rd = rvSext(
|
|
(bits(Rs2, xlen/2-1, 0) << (xlen / 2)) | \
|
|
bits(Rs1, xlen/2-1, 0)
|
|
);
|
|
}});
|
|
0x5: min({{
|
|
Rd_sd = std::min(rvSext(Rs1_sd), rvSext(Rs2_sd));
|
|
}});
|
|
0x10: sh2add({{
|
|
Rd = rvSext((Rs1 << 2) + Rs2);
|
|
}});
|
|
0x14: xperm8({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = _rvk_emu_xperm8_32(Rs1_sd, Rs2_sd);
|
|
} else {
|
|
Rd_sd = _rvk_emu_xperm8_64(Rs1_sd, Rs2_sd);
|
|
}
|
|
}});
|
|
0x20: xnor({{
|
|
Rd = rvSext(~(Rs1 ^ Rs2));
|
|
}});
|
|
}
|
|
0x5: decode FUNCT7 {
|
|
0x0: srl({{
|
|
Rd = rvSext(rvZext(Rs1) >>
|
|
rvSelect(Rs2<4:0>, Rs2<5:0>));
|
|
}});
|
|
0x1: divu({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd = (int32_t)divu<uint32_t>(Rs1, Rs2);
|
|
} else {
|
|
Rd = divu<uint64_t>(Rs1, Rs2);
|
|
}
|
|
}}, IntDivOp);
|
|
0x20: sra({{
|
|
Rd = rvSext(Rs1_sd) >> rvSelect(Rs2<4:0>, Rs2<5:0>);
|
|
}});
|
|
0x5: minu({{
|
|
Rd = rvSext(std::min(rvZext(Rs1), rvZext(Rs2)));
|
|
}});
|
|
0x24: bext({{
|
|
Rs2 &= (rvSelect(32, 64) - 1);
|
|
// It doesn't need to sign ext because MSB is always 0
|
|
Rd = (Rs1 >> Rs2) & 0x1;
|
|
}});
|
|
0x30: ror({{
|
|
uint64_t xlen = rvSelect(32, 64);
|
|
int shamt = Rs2 & (xlen - 1);
|
|
Rd = rvSext((rvZext(Rs1) >> shamt)
|
|
| (Rs1 << ((xlen - shamt) & (xlen - 1))));
|
|
}});
|
|
}
|
|
0x6: decode FUNCT7 {
|
|
0x0: or({{
|
|
Rd = rvSext(Rs1 | Rs2);
|
|
}});
|
|
0x1: rem({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd_sd = rem<int32_t>(Rs1, Rs2);
|
|
} else {
|
|
Rd_sd = rem<int64_t>(Rs1, Rs2);
|
|
}
|
|
}}, IntDivOp);
|
|
0x5: max({{
|
|
Rd_sd = std::max(rvSext(Rs1_sd), rvSext(Rs2_sd));
|
|
}});
|
|
0x10: sh3add({{
|
|
Rd = rvSext((Rs1 << 3) + Rs2);
|
|
}});
|
|
0x20: orn({{
|
|
Rd = rvSext(Rs1 | (~Rs2));
|
|
}});
|
|
}
|
|
0x7: decode FUNCT7 {
|
|
0x0: and({{
|
|
Rd = rvSext(Rs1 & Rs2);
|
|
}});
|
|
0x1: remu({{
|
|
if (machInst.rv_type == RV32) {
|
|
Rd = (int32_t)remu<uint32_t>(Rs1, Rs2);
|
|
} else {
|
|
Rd = remu<uint64_t>(Rs1, Rs2);
|
|
}
|
|
}}, IntDivOp);
|
|
0x4: packh({{
|
|
// It doesn't need to sign ext as MSB is always 0
|
|
Rd = (Rs2_ub << 8) | Rs1_ub;
|
|
}});
|
|
0x5: maxu({{
|
|
Rd = rvSext(std::max(rvZext(Rs1), rvZext(Rs2)));
|
|
}});
|
|
0x20: andn({{
|
|
Rd = rvSext(Rs1 & (~Rs2));
|
|
}});
|
|
}
|
|
}
|
|
}
|
|
|
|
0x0d: UOp::lui({{
|
|
Rd = (sext<20>(imm) << 12);
|
|
}});
|
|
|
|
0x0e: decode RVTYPE {
|
|
0x1: decode FUNCT3 {
|
|
format ROp {
|
|
0x0: decode FUNCT7 {
|
|
0x0: addw({{
|
|
Rd_sd = Rs1_sw + Rs2_sw;
|
|
}});
|
|
0x1: mulw({{
|
|
Rd_sd = (int32_t)(Rs1_sw*Rs2_sw);
|
|
}}, IntMultOp);
|
|
0x4: add_uw({{
|
|
Rd = Rs1_uw + Rs2;
|
|
}});
|
|
0x20: subw({{
|
|
Rd_sd = Rs1_sw - Rs2_sw;
|
|
}});
|
|
}
|
|
0x1: decode FUNCT7 {
|
|
0x0: sllw({{
|
|
Rd_sd = Rs1_sw << Rs2<4:0>;
|
|
}});
|
|
0x30: rolw({{
|
|
int shamt = Rs2 & (32 - 1);
|
|
Rd = (int32_t) ((Rs1_uw << shamt) | (Rs1_uw >> ((32 - shamt) & (32 - 1))));
|
|
}});
|
|
}
|
|
0x2: decode FUNCT7 {
|
|
0x10: sh1add_uw({{
|
|
Rd = (((uint64_t)Rs1_uw) << 1) + Rs2;
|
|
}});
|
|
}
|
|
0x4: decode FUNCT7 {
|
|
0x1: divw({{
|
|
Rd_sd = div<int32_t>(Rs1, Rs2);
|
|
}}, IntDivOp);
|
|
0x4: packw({{
|
|
Rd_sd = sext<32>((Rs2_uh << 16) | Rs1_uh);
|
|
}});
|
|
0x10: sh2add_uw({{
|
|
Rd = (((uint64_t)Rs1_uw) << 2) + Rs2;
|
|
}});
|
|
}
|
|
0x5: decode FUNCT7 {
|
|
0x0: srlw({{
|
|
Rd_sd = (int32_t)(Rs1_uw >> Rs2<4:0>);
|
|
}});
|
|
0x1: divuw({{
|
|
Rd = sext<32>(divu<uint32_t>(Rs1, Rs2));
|
|
}}, IntDivOp);
|
|
0x20: sraw({{
|
|
Rd_sd = Rs1_sw >> Rs2<4:0>;
|
|
}});
|
|
0x30: rorw({{
|
|
int shamt = Rs2 & (32 - 1);
|
|
Rd = (int32_t) ((Rs1_uw >> shamt) | (Rs1_uw << ((32 - shamt) & (32 - 1))));
|
|
}});
|
|
}
|
|
0x6: decode FUNCT7 {
|
|
0x1: remw({{
|
|
Rd_sd = rem<int32_t>(Rs1, Rs2);
|
|
}}, IntDivOp);
|
|
0x10: sh3add_uw({{
|
|
Rd = (((uint64_t)Rs1_uw) << 3) + Rs2;
|
|
}});
|
|
}
|
|
0x7: remuw({{
|
|
Rd = sext<32>(remu<uint32_t>(Rs1, Rs2));
|
|
}}, IntDivOp);
|
|
}
|
|
}
|
|
}
|
|
|
|
format FPROp {
|
|
0x10: decode FUNCT2 {
|
|
0x0: fmadd_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits)),
|
|
f32(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x1: fmadd_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits)),
|
|
f64(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x2: fmadd_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits)),
|
|
f16(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
}
|
|
0x11: decode FUNCT2 {
|
|
0x0: fmsub_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_mulAdd(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits)),
|
|
f32(f32(freg(Fs3_bits)).v ^
|
|
mask(31, 31))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x1: fmsub_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_mulAdd(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits)),
|
|
f64(f64(freg(Fs3_bits)).v ^
|
|
mask(63, 63))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x2: fmsub_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_mulAdd(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits)),
|
|
f16(f16(freg(Fs3_bits)).v ^
|
|
mask(15, 15))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
}
|
|
0x12: decode FUNCT2 {
|
|
0x0: fnmsub_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
|
|
mask(31, 31)),
|
|
f32(freg(Fs2_bits)),
|
|
f32(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x1: fnmsub_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
|
|
mask(63, 63)),
|
|
f64(freg(Fs2_bits)),
|
|
f64(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x2: fnmsub_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
|
|
mask(15, 15)),
|
|
f16(freg(Fs2_bits)),
|
|
f16(freg(Fs3_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
}
|
|
0x13: decode FUNCT2 {
|
|
0x0: fnmadd_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_mulAdd(f32(f32(freg(Fs1_bits)).v ^
|
|
mask(31, 31)),
|
|
f32(freg(Fs2_bits)),
|
|
f32(f32(freg(Fs3_bits)).v ^
|
|
mask(31, 31))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x1: fnmadd_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_mulAdd(f64(f64(freg(Fs1_bits)).v ^
|
|
mask(63, 63)),
|
|
f64(freg(Fs2_bits)),
|
|
f64(f64(freg(Fs3_bits)).v ^
|
|
mask(63, 63))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
0x2: fnmadd_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_mulAdd(f16(f16(freg(Fs1_bits)).v ^
|
|
mask(15, 15)),
|
|
f16(freg(Fs2_bits)),
|
|
f16(f16(freg(Fs3_bits)).v ^
|
|
mask(15, 15))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultAccOp);
|
|
}
|
|
0x14: decode FUNCT7 {
|
|
0x0: fadd_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_add(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x1: fadd_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_add(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x2: fadd_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_add(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x4: fsub_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_sub(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x5: fsub_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_sub(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x6: fsub_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_sub(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatAddOp);
|
|
0x8: fmul_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_mul(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultOp);
|
|
0x9: fmul_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_mul(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultOp);
|
|
0xa: fmul_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_mul(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatMultOp);
|
|
0xc: fdiv_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_div(f32(freg(Fs1_bits)),
|
|
f32(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatDivOp);
|
|
0xd: fdiv_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_div(f64(freg(Fs1_bits)),
|
|
f64(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatDivOp);
|
|
0xe: fdiv_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_div(f16(freg(Fs1_bits)),
|
|
f16(freg(Fs2_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatDivOp);
|
|
0x10: decode ROUND_MODE {
|
|
0x0: fsgnj_s({{
|
|
auto sign = bits(unboxF32(Fs2_bits), 31);
|
|
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
0x1: fsgnjn_s({{
|
|
auto sign = ~bits(unboxF32(Fs2_bits), 31);
|
|
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
0x2: fsgnjx_s({{
|
|
auto sign = bits(
|
|
unboxF32(Fs1_bits) ^ unboxF32(Fs2_bits), 31);
|
|
Fd_bits = boxF32(insertBits(unboxF32(Fs1_bits), 31,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x11: decode ROUND_MODE {
|
|
0x0: fsgnj_d({{
|
|
Fd_bits = insertBits(Fs2_bits, 62, 0, Fs1_bits);
|
|
}}, FloatMiscOp);
|
|
0x1: fsgnjn_d({{
|
|
Fd_bits = insertBits(~Fs2_bits, 62, 0, Fs1_bits);
|
|
}}, FloatMiscOp);
|
|
0x2: fsgnjx_d({{
|
|
Fd_bits = insertBits(
|
|
Fs1_bits ^ Fs2_bits, 62, 0, Fs1_bits);
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x12: decode ROUND_MODE {
|
|
0x0: fsgnj_h({{
|
|
auto sign = bits(unboxF16(Fs2_bits), 15);
|
|
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
0x1: fsgnjn_h({{
|
|
auto sign = ~bits(unboxF16(Fs2_bits), 15);
|
|
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
0x2: fsgnjx_h({{
|
|
auto sign = bits(
|
|
unboxF16(Fs1_bits) ^ unboxF16(Fs2_bits), 15);
|
|
Fd_bits = boxF16(insertBits(unboxF16(Fs1_bits), 15,
|
|
sign));
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x14: decode ROUND_MODE {
|
|
0x0: fmin_s({{
|
|
float32_t fs1 = f32(freg(Fs1_bits));
|
|
float32_t fs2 = f32(freg(Fs2_bits));
|
|
float32_t fd;
|
|
bool less = f32_lt_quiet(fs1, fs2) ||
|
|
(f32_eq(fs1, fs2) && bits(fs1.v, 31));
|
|
|
|
fd = less || isNaNF32UI(fs2.v) ? fs1 : fs2;
|
|
if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
|
|
fd = f32(defaultNaNF32UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
0x1: fmax_s({{
|
|
float32_t fs1 = f32(freg(Fs1_bits));
|
|
float32_t fs2 = f32(freg(Fs2_bits));
|
|
float32_t fd;
|
|
bool greater = f32_lt_quiet(fs2, fs1) ||
|
|
(f32_eq(fs2, fs1) && bits(fs2.v, 31));
|
|
|
|
fd = greater || isNaNF32UI(fs2.v) ? fs1: fs2;
|
|
if (isNaNF32UI(fs1.v) && isNaNF32UI(fs2.v))
|
|
fd = f32(defaultNaNF32UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x15: decode ROUND_MODE {
|
|
0x0: fmin_d({{
|
|
float64_t fs1 = f64(freg(Fs1_bits));
|
|
float64_t fs2 = f64(freg(Fs2_bits));
|
|
float64_t fd;
|
|
bool less = f64_lt_quiet(fs1, fs2) ||
|
|
(f64_eq(fs1, fs2) && bits(fs1.v, 63));
|
|
|
|
fd = less || isNaNF64UI(fs2.v) ? fs1 : fs2;
|
|
if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
|
|
fd = f64(defaultNaNF64UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
0x1: fmax_d({{
|
|
float64_t fs1 = f64(freg(Fs1_bits));
|
|
float64_t fs2 = f64(freg(Fs2_bits));
|
|
float64_t fd;
|
|
bool greater = f64_lt_quiet(fs2, fs1) ||
|
|
(f64_eq(fs2, fs1) && bits(fs2.v, 63));
|
|
|
|
fd = greater || isNaNF64UI(fs2.v) ? fs1 : fs2;
|
|
if (isNaNF64UI(fs1.v) && isNaNF64UI(fs2.v))
|
|
fd = f64(defaultNaNF64UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x16: decode ROUND_MODE {
|
|
0x0: fmin_h({{
|
|
float16_t fs1 = f16(freg(Fs1_bits));
|
|
float16_t fs2 = f16(freg(Fs2_bits));
|
|
float16_t fd;
|
|
bool less = f16_lt_quiet(fs1, fs2) ||
|
|
(f16_eq(fs1, fs2) && bits(fs1.v, 15));
|
|
|
|
fd = less || isNaNF16UI(fs2.v) ? fs1 : fs2;
|
|
if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
|
|
fd = f16(defaultNaNF16UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
0x1: fmax_h({{
|
|
float16_t fs1 = f16(freg(Fs1_bits));
|
|
float16_t fs2 = f16(freg(Fs2_bits));
|
|
float16_t fd;
|
|
bool greater = f16_lt_quiet(fs2, fs1) ||
|
|
(f16_eq(fs2, fs1) && bits(fs2.v, 15));
|
|
|
|
fd = greater || isNaNF16UI(fs2.v) ? fs1 : fs2;
|
|
if (isNaNF16UI(fs1.v) && isNaNF16UI(fs2.v))
|
|
fd = f16(defaultNaNF16UI);
|
|
Fd_bits = freg(fd).v;
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x20: decode CONV_SGN {
|
|
0x1: fcvt_s_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_to_f32(f64(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x2: fcvt_s_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_to_f32(f16(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x21: decode CONV_SGN {
|
|
0x0: fcvt_d_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_to_f64(f32(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x2: fcvt_d_h({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f16_to_f64(f16(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x22: decode CONV_SGN {
|
|
0x0: fcvt_h_s({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f32_to_f16(f32(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_h_d({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(f64_to_f16(f64(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x2c: fsqrt_s({{
|
|
if (RS2 != 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x1", machInst);
|
|
}
|
|
freg_t fd;
|
|
RM_REQUIRED;
|
|
fd = freg(f32_sqrt(f32(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatSqrtOp);
|
|
0x2d: fsqrt_d({{
|
|
if (RS2 != 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x1", machInst);
|
|
}
|
|
freg_t fd;
|
|
RM_REQUIRED;
|
|
fd = freg(f64_sqrt(f64(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatSqrtOp);
|
|
0x2e: fsqrt_h({{
|
|
if (RS2 != 0) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"source reg x1", machInst);
|
|
}
|
|
freg_t fd;
|
|
RM_REQUIRED;
|
|
fd = freg(f16_sqrt(f16(freg(Fs1_bits))));
|
|
Fd_bits = fd.v;
|
|
}}, FloatSqrtOp);
|
|
0x50: decode ROUND_MODE {
|
|
0x0: fle_s({{
|
|
Rd = f32_le(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x1: flt_s({{
|
|
Rd = f32_lt(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x2: feq_s({{
|
|
Rd = f32_eq(f32(freg(Fs1_bits)), f32(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x51: decode ROUND_MODE {
|
|
0x0: fle_d({{
|
|
Rd = f64_le(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x1: flt_d({{
|
|
Rd = f64_lt(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x2: feq_d({{
|
|
Rd = f64_eq(f64(freg(Fs1_bits)), f64(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x52: decode ROUND_MODE {
|
|
0x0: fle_h({{
|
|
Rd = f16_le(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x1: flt_h({{
|
|
Rd = f16_lt(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
0x2: feq_h({{
|
|
Rd = f16_eq(f16(freg(Fs1_bits)), f16(freg(Fs2_bits)));
|
|
}}, FloatCmpOp);
|
|
}
|
|
0x60: decode CONV_SGN {
|
|
0x0: fcvt_w_s({{
|
|
RM_REQUIRED;
|
|
Rd_sd = sext<32>(f32_to_i32(f32(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_wu_s({{
|
|
RM_REQUIRED;
|
|
Rd = sext<32>(f32_to_ui32(f32(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_l_s({{
|
|
RM_REQUIRED;
|
|
Rd_sd = f32_to_i64(f32(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_lu_s({{
|
|
RM_REQUIRED;
|
|
Rd = f32_to_ui64(f32(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x61: decode CONV_SGN {
|
|
0x0: fcvt_w_d({{
|
|
RM_REQUIRED;
|
|
Rd_sd = sext<32>(f64_to_i32(f64(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_wu_d({{
|
|
RM_REQUIRED;
|
|
Rd = sext<32>(f64_to_ui32(f64(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_l_d({{
|
|
RM_REQUIRED;
|
|
Rd_sd = f64_to_i64(f64(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_lu_d({{
|
|
RM_REQUIRED;
|
|
Rd = f64_to_ui64(f64(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x62: decode CONV_SGN {
|
|
0x0: fcvt_w_h({{
|
|
RM_REQUIRED;
|
|
Rd_sd = sext<32>(f16_to_i32(f16(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_wu_h({{
|
|
RM_REQUIRED;
|
|
Rd = sext<32>(f16_to_ui32(f16(freg(Fs1_bits)), rm,
|
|
true));
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_l_h({{
|
|
RM_REQUIRED;
|
|
Rd_sd = f16_to_i64(f16(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_lu_h({{
|
|
RM_REQUIRED;
|
|
Rd = f16_to_ui64(f16(freg(Fs1_bits)), rm, true);
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x68: decode CONV_SGN {
|
|
0x0: fcvt_s_w({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i32_to_f32(Rs1_sw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_s_wu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui32_to_f32(Rs1_uw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_s_l({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i64_to_f32(Rs1_ud));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_s_lu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui64_to_f32(Rs1));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x69: decode CONV_SGN {
|
|
0x0: fcvt_d_w({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i32_to_f64(Rs1_sw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_d_wu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui32_to_f64(Rs1_uw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_d_l({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i64_to_f64(Rs1_sd));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_d_lu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui64_to_f64(Rs1));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x6a: decode CONV_SGN {
|
|
0x0: fcvt_h_w({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i32_to_f16((int32_t)Rs1_sw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x1: fcvt_h_wu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui32_to_f16((uint32_t)Rs1_uw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x2: decode RVTYPE {
|
|
0x1: fcvt_h_l({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(i64_to_f16(Rs1_ud));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x3: decode RVTYPE {
|
|
0x1: fcvt_h_lu({{
|
|
RM_REQUIRED;
|
|
freg_t fd;
|
|
fd = freg(ui64_to_f16(Rs1));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
0x70: decode ROUND_MODE {
|
|
0x0: fmv_x_w({{
|
|
uint64_t result = (uint32_t)Fs1_bits;
|
|
if ((result&0x80000000) != 0) {
|
|
result |= (0xFFFFFFFFULL << 32);
|
|
}
|
|
Rd = result;
|
|
}}, FloatCvtOp);
|
|
0x1: fclass_s({{
|
|
Rd = rvSext(f32_classify(f32(freg(Fs1_bits))));
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x71: decode ROUND_MODE {
|
|
0x0: decode RVTYPE {
|
|
0x1: fmv_x_d({{
|
|
Rd = freg(Fs1_bits).v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x1: fclass_d({{
|
|
Rd = f64_classify(f64(freg(Fs1_bits)));
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x72: decode ROUND_MODE {
|
|
0x0: fmv_x_h({{
|
|
uint64_t result = (uint16_t)Fs1_bits;
|
|
if ((result&0x8000) != 0) {
|
|
result |= (0xFFFFFFFFFFFFULL << 16);
|
|
}
|
|
Rd = result;
|
|
}}, FloatCvtOp);
|
|
0x1: fclass_h({{
|
|
Rd = f16_classify(f16(freg(Fs1_bits)));
|
|
}}, FloatMiscOp);
|
|
}
|
|
0x78: fmv_w_x({{
|
|
freg_t fd;
|
|
fd = freg(f32(Rs1_uw));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
0x79: decode RVTYPE {
|
|
0x1: fmv_d_x({{
|
|
freg_t fd;
|
|
fd = freg(f64(Rs1));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
0x7a: fmv_h_x({{
|
|
freg_t fd;
|
|
fd = freg(f16(Rs1_uh));
|
|
Fd_bits = fd.v;
|
|
}}, FloatCvtOp);
|
|
}
|
|
}
|
|
|
|
0x15: decode FUNCT3 {
|
|
// OPIVV
|
|
0x0: decode VFUNCT6 {
|
|
format VectorIntFormat {
|
|
0x0: vadd_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] + Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2: vsub_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] - Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x4: vminu_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] < Vs1_vu[i] ?
|
|
Vs2_vu[i] : Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x5: vmin_vv({{
|
|
Vd_vi[i] = Vs2_vi[i] < Vs1_vi[i] ?
|
|
Vs2_vi[i] : Vs1_vi[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x6: vmaxu_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] > Vs1_vu[i] ?
|
|
Vs2_vu[i] : Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x7: vmax_vv({{
|
|
Vd_vi[i] = Vs2_vi[i] > Vs1_vi[i] ?
|
|
Vs2_vi[i] : Vs1_vi[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x9: vand_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] & Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0xa: vor_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] | Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0xb: vxor_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] ^ Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
0x0c: VectorGatherFormat::vrgather_vv({{
|
|
for (uint32_t i = 0; i < microVl; i++) {
|
|
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
|
|
if (this->vm || elem_mask(v0, ei)) {
|
|
const uint64_t idx = Vs1_vu[i]
|
|
- vs2_elems * vs2_idx;
|
|
auto res = (Vs1_vu[i] >= vlmax) ? 0
|
|
: (idx < vs2_elems) ? Vs2_vu[idx]
|
|
: Vs3_vu[i];
|
|
Vd_vu[i] = res;
|
|
}
|
|
}
|
|
}}, OPIVV, VectorMiscOp);
|
|
0x0e: VectorGatherFormat::vrgatherei16_vv({{
|
|
for (uint32_t i = 0; i < microVl; i++) {
|
|
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
|
|
if (this->vm || elem_mask(v0, ei)) {
|
|
const uint32_t idx = Vs1_uh[i + vs1_bias]
|
|
- vs2_elems * vs2_idx;
|
|
auto res = (Vs1_uh[i + vs1_bias] >= vlmax) ? 0
|
|
: (idx < vs2_elems) ? Vs2_vu[idx]
|
|
: Vs3_vu[i + vd_bias];
|
|
Vd_vu[i + vd_bias] = res;
|
|
}
|
|
}
|
|
}}, OPIVV, VectorMiscOp);
|
|
format VectorIntFormat {
|
|
0x10: decode VM {
|
|
0x0: vadc_vvm({{
|
|
Vd_vi[i] = Vs2_vi[i] + Vs1_vi[i]
|
|
+ elem_mask(v0, ei);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
// the unmasked versions (vm=1) are reserved
|
|
}
|
|
0x12: decode VM {
|
|
0x0: vsbc_vvm({{
|
|
Vd_vi[i] = Vs2_vi[i] - Vs1_vi[i]
|
|
- elem_mask(v0, ei);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
// the unmasked versions (vm=1) are reserved
|
|
}
|
|
0x17: decode VM {
|
|
0x0: vmerge_vvm({{
|
|
Vd_vu[i] = elem_mask(v0, ei)
|
|
? Vs1_vu[i]
|
|
: Vs2_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1: decode VS2 {
|
|
0x0: vmv_v_v({{
|
|
Vd_vu[i] = Vs1_vu[i];
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
}
|
|
format VectorIntVxsatFormat{
|
|
0x20: vsaddu_vv({{
|
|
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Vs1_vu[i],
|
|
vxsatptr);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x21: vsadd_vv({{
|
|
Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Vs1_vu[i],
|
|
vxsatptr);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x22: vssubu_vv({{
|
|
Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Vs1_vu[i],
|
|
vxsatptr);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x23: vssub_vv({{
|
|
Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Vs1_vu[i],
|
|
vxsatptr);
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x27: vsmul_vv({{
|
|
vi max = std::numeric_limits<vi>::max();
|
|
vi min = std::numeric_limits<vi>::min();
|
|
bool overflow = Vs1_vi[i] == Vs2_vi[i] &&
|
|
Vs1_vi[i] == min;
|
|
__int128_t result = (__int128_t)Vs1_vi[i] *
|
|
(__int128_t)Vs2_vi[i];
|
|
result = int_rounding<__int128_t>(
|
|
result, 0 /* TODO */, sew - 1);
|
|
result = result >> (sew - 1);
|
|
if (overflow) {
|
|
result = max;
|
|
*vxsatptr = true;
|
|
}
|
|
|
|
Vd_vi[i] = (vi)result;
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntFormat {
|
|
0x25: vsll_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] << (Vs1_vu[i] & (sew - 1));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x28: vsrl_vv({{
|
|
Vd_vu[i] = Vs2_vu[i] >> (Vs1_vu[i] & (sew - 1));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x29: vsra_vv({{
|
|
Vd_vi[i] = Vs2_vi[i] >> (Vs1_vu[i] & (sew - 1));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2a: vssrl_vv({{
|
|
int sh = Vs1_vu[i] & (sew - 1);
|
|
__uint128_t val = Vs2_vu[i];
|
|
|
|
val = int_rounding<__uint128_t>(val,
|
|
xc->readMiscReg(MISCREG_VXRM), sh);
|
|
Vd_vu[i] = val >> sh;
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2b: vssra_vv({{
|
|
int sh = Vs1_vi[i] & (sew - 1);
|
|
__int128_t val = Vs2_vi[i];
|
|
|
|
val = int_rounding<__int128_t>(val,
|
|
xc->readMiscReg(MISCREG_VXRM), sh);
|
|
Vd_vi[i] = val >> sh;
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
format VectorReduceIntWideningFormat {
|
|
0x30: vwredsumu_vs({{
|
|
Vd_vwu[0] = reduce_loop(std::plus<vwu>(),
|
|
Vs1_vwu, Vs2_vu);
|
|
}}, OPIVV, VectorIntegerReduceOp);
|
|
0x31: vwredsum_vs({{
|
|
Vd_vwu[0] = reduce_loop(std::plus<vwi>(),
|
|
Vs1_vwi, Vs2_vi);
|
|
}}, OPIVV, VectorIntegerReduceOp);
|
|
}
|
|
format VectorIntMaskFormat {
|
|
0x11: decode VM {
|
|
0x0: vmadc_vvm({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vu[i], Vs1_vu[i],
|
|
elem_mask(v0, ei)));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1: vmadc_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vu[i], Vs1_vu[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
0x13: decode VM {
|
|
0x0: vmsbc_vvm({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
borrow_out(Vs2_vi[i], Vs1_vi[i],
|
|
elem_mask(v0, ei)));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1: vmsbc_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
borrow_out(Vs2_vi[i], Vs1_vi[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
0x18: vmseq_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] == Vs1_vu[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x19: vmsne_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] != Vs1_vu[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1a: vmsltu_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] < Vs1_vu[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1b: vmslt_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] < Vs1_vi[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1c: vmsleu_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] <= Vs1_vu[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x1d: vmsle_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] <= Vs1_vi[i]));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntNarrowingFormat {
|
|
0x2c: vnsrl_wv({{
|
|
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
|
|
((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2d: vnsra_wv({{
|
|
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
|
|
((vwu)Vs1_vu[i + offset] & (sew * 2 - 1)));
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2e: vnclipu_wv({{
|
|
vu max = std::numeric_limits<vu>::max();
|
|
uint64_t sign_mask =
|
|
std::numeric_limits<uint64_t>::max() << sew;
|
|
__uint128_t res = Vs2_vwu[i];
|
|
unsigned shift = Vs1_vu[i + offset] & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__uint128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res & sign_mask) {
|
|
res = max;
|
|
// TODO: vxsat
|
|
}
|
|
|
|
Vd_vu[i + offset] = (vu)res;
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
0x2f: vnclip_wv({{
|
|
vi max = std::numeric_limits<vi>::max();
|
|
vi min = std::numeric_limits<vi>::min();
|
|
__int128_t res = Vs2_vwi[i];
|
|
unsigned shift = Vs1_vi[i + offset] & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__int128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res < min) {
|
|
res = min;
|
|
// TODO: vxsat
|
|
} else if (res > max) {
|
|
res = max;
|
|
// TODO: vxsat
|
|
}
|
|
|
|
Vd_vi[i + offset] = (vi)res;
|
|
}}, OPIVV, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
// OPFVV
|
|
0x1: decode VFUNCT6 {
|
|
0x00: VectorFloatFormat::vfadd_vv({{
|
|
auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x01: VectorReduceFloatFormat::vfredusum_vs({{
|
|
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
|
|
return fadd<et>(ftype<et>(src1), ftype<et>(src2));
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
0x02: VectorFloatFormat::vfsub_vv({{
|
|
auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x03: VectorReduceFloatFormat::vfredosum_vs({{
|
|
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
|
|
return fadd<et>(ftype<et>(src1), ftype<et>(src2));
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
0x04: VectorFloatFormat::vfmin_vv({{
|
|
auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x05: VectorReduceFloatFormat::vfredmin_vs({{
|
|
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
|
|
return fmin<et>(ftype<et>(src1), ftype<et>(src2));
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
0x06: VectorFloatFormat::vfmax_vv({{
|
|
auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x07: VectorReduceFloatFormat::vfredmax_vs({{
|
|
Vd_vu[0] = reduce_loop([](const vu& src1, const vu& src2) {
|
|
return fmax<et>(ftype<et>(src1), ftype<et>(src2));
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
0x08: VectorFloatFormat::vfsgnj_vv({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]),
|
|
false, false).v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x09: VectorFloatFormat::vfsgnjn_vv({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]),
|
|
true, false).v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x0a: VectorFloatFormat::vfsgnjx_vv({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]),
|
|
false, true).v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
// VWFUNARY0
|
|
0x10: decode VS1 {
|
|
0x00: decode VM {
|
|
// The encodings corresponding to the masked versions
|
|
// (vm=0) of vfmv.f.s are reserved
|
|
0x1: VectorNonSplitFormat::vfmv_f_s({{
|
|
freg_t fd = freg(Vs2_vu[0]);
|
|
Fd_bits = fd.v;
|
|
}}, OPFVV, VectorMiscOp);
|
|
}
|
|
}
|
|
0x12: decode VS1 {
|
|
format VectorFloatCvtFormat {
|
|
0x00: vfcvt_xu_f_v({{
|
|
Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x01: vfcvt_x_f_v({{
|
|
Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x02: vfcvt_f_xu_v({{
|
|
auto fd = ui_to_f<et>(Vs2_vu[i]);
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x03: vfcvt_f_x_v({{
|
|
auto fd = i_to_f<et>(Vs2_vu[i]);
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x06: vfcvt_rtz_xu_f_v({{
|
|
Vd_vu[i] = f_to_ui<et>(ftype<et>(Vs2_vu[i]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x07: vfcvt_rtz_x_f_v({{
|
|
Vd_vu[i] = f_to_i<et>(ftype<et>(Vs2_vu[i]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
}
|
|
format VectorFloatWideningCvtFormat {
|
|
0x08: vfwcvt_xu_f_v({{
|
|
Vd_vwu[i] = f_to_wui<et>(
|
|
ftype<et>(Vs2_vu[i + offset]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x09: vfwcvt_x_f_v({{
|
|
Vd_vwu[i] = f_to_wi<et>(
|
|
ftype<et>(Vs2_vu[i + offset]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x0a: vfwcvt_f_xu_v({{
|
|
auto fd = ui_to_wf<vu>(Vs2_vu[i + offset]);
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x0b: vfwcvt_f_x_v({{
|
|
auto fd = i_to_wf<vu>(Vs2_vu[i + offset]);
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x0c: vfwcvt_f_f_v({{
|
|
auto fd = f_to_wf<et>(
|
|
ftype<et>(Vs2_vu[i + offset]));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x0e: vfwcvt_rtz_xu_f_v({{
|
|
Vd_vwu[i] = f_to_wui<et>(
|
|
ftype<et>(Vs2_vu[i + offset]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x0f: vfwcvt_rtz_x_f_v({{
|
|
Vd_vwu[i] = f_to_wi<et>(
|
|
ftype<et>(Vs2_vu[i + offset]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
}
|
|
format VectorFloatNarrowingCvtFormat {
|
|
0x10: vfncvt_xu_f_w({{
|
|
Vd_vu[i + offset] = f_to_nui<vu>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x11: vfncvt_x_f_w({{
|
|
Vd_vu[i + offset] = f_to_ni<vu>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
softfloat_roundingMode);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x12: vfncvt_f_xu_w({{
|
|
auto fd = ui_to_nf<et>(Vs2_vwu[i]);
|
|
Vd_vu[i + offset] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x13: vfncvt_f_x_w({{
|
|
auto fd = i_to_nf<et>(Vs2_vwu[i]);
|
|
Vd_vu[i + offset] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x14: vfncvt_f_f_w({{
|
|
auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
|
|
Vd_vu[i + offset] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x15: vfncvt_rod_f_f_w({{
|
|
softfloat_roundingMode = softfloat_round_odd;
|
|
auto fd = f_to_nf<et>(ftype<ewt>(Vs2_vwu[i]));
|
|
Vd_vu[i + offset] = fd.v;
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x16: vfncvt_rtz_xu_f_w({{
|
|
Vd_vu[i + offset] = f_to_nui<vu>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
0x17: vfncvt_rtz_x_f_w({{
|
|
Vd_vu[i + offset] = f_to_ni<vu>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
softfloat_round_minMag);
|
|
}}, OPFVV, VectorFloatConvertOp);
|
|
}
|
|
}
|
|
0x13: decode VS1 {
|
|
format VectorFloatCvtFormat {
|
|
0x00: vfsqrt_v({{
|
|
auto fd = fsqrt<et>(ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x04: vfrsqrt7_v({{
|
|
auto fd = frsqrte7<et>(ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x05: vfrec7_v({{
|
|
auto fd = frecip7<et>(ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x10: vfclass_v({{
|
|
auto fd = fclassify<et>(ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
}
|
|
}
|
|
|
|
format VectorFloatMaskFormat {
|
|
0x18: vmfeq_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
feq<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i])));
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x19: vmfle_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
fle<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i])));
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x1b: vmflt_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
flt<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i])));
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x1c: vmfne_vv({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
!feq<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i])));
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
}
|
|
format VectorFloatFormat {
|
|
0x20: vfdiv_vv({{
|
|
auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x24: vfmul_vv({{
|
|
auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs1_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x28: vfmadd_vv({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
|
|
ftype<et>(Vs1_vu[i]),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x29: vfnmadd_vv({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
|
|
ftype<et>(Vs1_vu[i]),
|
|
fneg(ftype<et>(Vs2_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2a: vfmsub_vv({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
|
|
ftype<et>(Vs1_vu[i]),
|
|
fneg(ftype<et>(Vs2_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2b: vfnmsub_vv({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
|
|
ftype<et>(Vs1_vu[i]),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2c: vfmacc_vv({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
|
|
ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs3_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2d: vfnmacc_vv({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
|
|
ftype<et>(Vs2_vu[i]),
|
|
fneg(ftype<et>(Vs3_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2e: vfmsac_vv({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs1_vu[i]),
|
|
ftype<et>(Vs2_vu[i]),
|
|
fneg(ftype<et>(Vs3_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x2f: vfnmsac_vv({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs1_vu[i])),
|
|
ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs3_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x31: VectorReduceFloatWideningFormat::vfwredusum_vs({{
|
|
Vd_vwu[0] = reduce_loop(
|
|
[](const vwu& src1, const vu& src2) {
|
|
return fadd<ewt>(
|
|
ftype<ewt>(src1),
|
|
f_to_wf<et>(ftype<et>(src2))
|
|
);
|
|
}, Vs1_vwu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
0x33: VectorReduceFloatWideningFormat::vfwredosum_vs({{
|
|
Vd_vwu[0] = reduce_loop(
|
|
[](const vwu& src1, const vu& src2) {
|
|
return fadd<ewt>(
|
|
ftype<ewt>(src1),
|
|
f_to_wf<et>(ftype<et>(src2))
|
|
);
|
|
}, Vs1_vwu, Vs2_vu);
|
|
}}, OPFVV, VectorFloatReduceOp);
|
|
}
|
|
format VectorFloatWideningFormat {
|
|
0x30: vfwadd_vv({{
|
|
auto fd = fadd<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x32: vfwsub_vv({{
|
|
auto fd = fsub<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x34: vfwadd_wv({{
|
|
auto fd = fadd<ewt>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x36: vfwsub_wv({{
|
|
auto fd = fsub<ewt>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x38: vfwmul_vv({{
|
|
auto fd = fmul<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x3c: vfwmacc_vv({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
ftype<ewt>(Vs3_vwu[i]));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x3d: vfwnmacc_vv({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fneg(ftype<ewt>(Vs3_vwu[i])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x3e: vfwmsac_vv({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(ftype<et>(Vs1_vu[i + offset])),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fneg(ftype<ewt>(Vs3_vwu[i])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
0x3f: vfwnmsac_vv({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(fneg(ftype<et>(Vs1_vu[i + offset]))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
ftype<ewt>(Vs3_vwu[i]));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVV, VectorFloatArithOp);
|
|
}
|
|
}
|
|
// OPMVV
|
|
0x2: decode VFUNCT6 {
|
|
format VectorReduceIntFormat {
|
|
0x0: vredsum_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop(std::plus<vi>(), Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x1: vredand_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop(std::bit_and<vi>(), Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x2: vredor_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop(std::bit_or<vi>(), Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x3: vredxor_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop(std::bit_xor<vi>(), Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x4: vredminu_vs({{
|
|
Vd_vu[0] =
|
|
reduce_loop([](const vu& src1, const vu& src2) {
|
|
return std::min<vu>(src1, src2);
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x5: vredmin_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop([](const vi& src1, const vi& src2) {
|
|
return std::min<vi>(src1, src2);
|
|
}, Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x6: vredmaxu_vs({{
|
|
Vd_vu[0] =
|
|
reduce_loop([](const vu& src1, const vu& src2) {
|
|
return std::max<vu>(src1, src2);
|
|
}, Vs1_vu, Vs2_vu);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
0x7: vredmax_vs({{
|
|
Vd_vi[0] =
|
|
reduce_loop([](const vi& src1, const vi& src2) {
|
|
return std::max<vi>(src1, src2);
|
|
}, Vs1_vi, Vs2_vi);
|
|
}}, OPMVV, VectorIntegerReduceOp);
|
|
}
|
|
format VectorIntFormat {
|
|
0x8: vaaddu_vv({{
|
|
__uint128_t res = (__uint128_t)Vs2_vu[i] + Vs1_vu[i];
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vu[i] = res >> 1;
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x9: vaadd_vv({{
|
|
__uint128_t res = (__uint128_t)Vs2_vi[i] + Vs1_vi[i];
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vi[i] = res >> 1;
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0xa: vasubu_vv({{
|
|
__uint128_t res = (__uint128_t)Vs2_vu[i] - Vs1_vu[i];
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vu[i] = res >> 1;
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0xb: vasub_vv({{
|
|
__uint128_t res = (__uint128_t)Vs2_vi[i] - Vs1_vi[i];
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vi[i] = res >> 1;
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
}
|
|
// VWXUNARY0
|
|
0x10: decode VS1 {
|
|
0x00: decode VM {
|
|
// The encodings corresponding to the masked versions
|
|
// (vm=0) of vmv.x.s are reserved.
|
|
0x1: VectorNonSplitFormat::vmv_x_s({{
|
|
Rd_ud = Vs2_vi[0];
|
|
}}, OPMVV, VectorMiscOp);
|
|
}
|
|
0x10: Vector1Vs1RdMaskFormat::vcpop_m({{
|
|
uint64_t popcount = 0;
|
|
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
|
|
bool vs2_lsb = elem_mask(Vs2_vu, i);
|
|
if(this->vm){
|
|
popcount += vs2_lsb;
|
|
}else{
|
|
bool do_mask = elem_mask(v0, i);
|
|
popcount += (vs2_lsb && do_mask);
|
|
}
|
|
}
|
|
Rd_vu = popcount;
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x11: Vector1Vs1RdMaskFormat::vfirst_m({{
|
|
int64_t pos = -1;
|
|
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
|
|
if(this->vm == 0){
|
|
if(elem_mask(v0, i)==0){
|
|
continue;
|
|
}
|
|
}
|
|
bool vs2_lsb = elem_mask(Vs2_vu, i);
|
|
if (vs2_lsb) {
|
|
pos = i;
|
|
break;
|
|
}
|
|
}
|
|
Rd_vu = pos;
|
|
}}, OPMVV, VectorMiscOp);
|
|
}
|
|
0x12: decode VS1 {
|
|
format VectorIntExtFormat {
|
|
0x02: vzext_vf8({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vu[i] = Vs2_vextu[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
0x03: vsext_vf8({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vi[i] = Vs2_vext[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
0x04: vzext_vf4({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vu[i] = Vs2_vextu[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
0x05: vsext_vf4({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vi[i] = Vs2_vext[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
0x06: vzext_vf2({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vu[i] = Vs2_vextu[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
0x07: vsext_vf2({{
|
|
auto offset = (vlen / SEW) * index;
|
|
|
|
Vd_vi[i] = Vs2_vext[i + offset];
|
|
}}, OPMVV, VectorIntegerExtensionOp);
|
|
}
|
|
}
|
|
0x14: decode VS1 {
|
|
0x01: Vector1Vs1VdMaskFormat::vmsbf_m({{
|
|
bool has_one = false;
|
|
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
|
|
bool vs2_lsb = elem_mask(Vs2_vu, i);
|
|
if (this->vm || elem_mask(v0, i)){
|
|
uint64_t res = 0;
|
|
if (!has_one && !vs2_lsb) {
|
|
res = 1;
|
|
} else if (!has_one && vs2_lsb) {
|
|
has_one = true;
|
|
}
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
|
|
}
|
|
}
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x02: Vector1Vs1VdMaskFormat::vmsof_m({{
|
|
bool has_one = false;
|
|
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
|
|
bool vs2_lsb = elem_mask(Vs2_vu, i);
|
|
if (this->vm || elem_mask(v0, i)){
|
|
uint64_t res = 0;
|
|
if (!has_one && vs2_lsb) {
|
|
has_one = true;
|
|
res = 1;
|
|
}
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
|
|
}
|
|
}
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x03: Vector1Vs1VdMaskFormat::vmsif_m({{
|
|
bool has_one = false;
|
|
for (uint32_t i = 0; i < (uint32_t)machInst.vl; i++) {
|
|
bool vs2_lsb = elem_mask(Vs2_vu, i);
|
|
if (this->vm || elem_mask(v0, i)){
|
|
uint64_t res = 0;
|
|
if (!has_one && !vs2_lsb) {
|
|
res = 1;
|
|
} else if (!has_one && vs2_lsb) {
|
|
has_one = true;
|
|
res = 1;
|
|
}
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i, res);
|
|
}
|
|
}
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x10: ViotaFormat::viota_m({{
|
|
RiscvISAInst::VecRegContainer tmp_s2;
|
|
xc->getRegOperand(this, 2,
|
|
&tmp_s2);
|
|
auto Vs2bit = tmp_s2.as<vu>();
|
|
for (uint32_t i = 0; i < this->microVl; i++) {
|
|
uint32_t ei = i +
|
|
vtype_VLMAX(vtype, vlen, true) *
|
|
this->microIdx;
|
|
bool vs2_lsb = elem_mask(Vs2bit, ei);
|
|
bool do_mask = elem_mask(v0, ei);
|
|
bool has_one = false;
|
|
if (this->vm || (do_mask && !this->vm)) {
|
|
if (vs2_lsb) {
|
|
has_one = true;
|
|
}
|
|
}
|
|
bool use_ori = (!this->vm) && !do_mask;
|
|
if(use_ori == false){
|
|
Vd_vu[i] = *cnt;
|
|
}
|
|
if (has_one) {
|
|
*cnt = *cnt+1;
|
|
}
|
|
}
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x11: VectorIntFormat::vid_v({{
|
|
Vd_vu[i] = ei;
|
|
}}, OPMVV, VectorMiscOp);
|
|
}
|
|
format VectorMaskFormat {
|
|
0x18: vmandn_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
elem_mask(Vs2_vu, i) & !elem_mask(Vs1_vu, i));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x19: vmand_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1a: vmor_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1b: vmxor_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1c: vmorn_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
elem_mask(Vs2_vu, i) | !elem_mask(Vs1_vu, i));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1d: vmnand_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
!(elem_mask(Vs2_vu, i) & elem_mask(Vs1_vu, i)));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1e: vmnor_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
!(elem_mask(Vs2_vu, i) | elem_mask(Vs1_vu, i)));
|
|
}}, OPMVV, VectorMiscOp);
|
|
0x1f: vmxnor_mm({{
|
|
Vd_ub[i/8] = ASSIGN_VD_BIT(i,
|
|
!(elem_mask(Vs2_vu, i) ^ elem_mask(Vs1_vu, i)));
|
|
}}, OPMVV, VectorMiscOp);
|
|
}
|
|
format VectorIntFormat {
|
|
0x20: vdivu_vv({{
|
|
Vd_vu[i] = divu<vu>(Vs2_vu[i], Vs1_vu[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x21: vdiv_vv({{
|
|
Vd_vi[i] = div<vi>(Vs2_vi[i], Vs1_vi[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x22: vremu_vv({{
|
|
Vd_vu[i] = remu<vu>(Vs2_vu[i], Vs1_vu[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x23: vrem_vv({{
|
|
Vd_vi[i] = rem<vi>(Vs2_vi[i], Vs1_vi[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x24: vmulhu_vv({{
|
|
Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Vs1_vu[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x25: vmul_vv({{
|
|
Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x26: vmulhsu_vv({{
|
|
Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Vs1_vu[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x27: vmulh_vv({{
|
|
Vd_vi[i] = mulh<vi>(Vs2_vi[i], Vs1_vi[i]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x29: vmadd_vv({{
|
|
Vd_vi[i] = Vs3_vi[i] * Vs1_vi[i] + Vs2_vi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x2b: vnmsub_vv({{
|
|
Vd_vi[i] = -(Vs3_vi[i] * Vs1_vi[i]) + Vs2_vi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x2d: vmacc_vv({{
|
|
Vd_vi[i] = Vs2_vi[i] * Vs1_vi[i] + Vs3_vi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x2f: vnmsac_vv({{
|
|
Vd_vi[i] = -(Vs2_vi[i] * Vs1_vi[i]) + Vs3_vi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntWideningFormat {
|
|
0x30: vwaddu_vv({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
|
|
+ vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x31: vwadd_vv({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
|
|
+ vwi(Vs1_vi[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x32: vwsubu_vv({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
|
|
- vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x33: vwsub_vv({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
|
|
- vwi(Vs1_vi[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x34: vwaddu_wv({{
|
|
Vd_vwu[i] = Vs2_vwu[i] + vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x35: vwadd_wv({{
|
|
Vd_vwi[i] = Vs2_vwi[i] + vwi(Vs1_vi[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x36: vwsubu_wv({{
|
|
Vd_vwu[i] = Vs2_vwu[i] - vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x37: vwsub_wv({{
|
|
Vd_vwi[i] = Vs2_vwi[i] - vwi(Vs1_vi[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x38: vwmulu_vv({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset])
|
|
* vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x3a: vwmulsu_vv({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
|
|
* vwu(Vs1_vu[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x3b: vwmul_vv({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset])
|
|
* vwi(Vs1_vi[i + offset]);
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x3c: vwmaccu_vv({{
|
|
Vd_vwu[i] = vwu(Vs1_vu[i + offset])
|
|
* vwu(Vs2_vu[i + offset])
|
|
+ Vs3_vwu[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x3d: vwmacc_vv({{
|
|
Vd_vwi[i] = vwi(Vs1_vi[i + offset])
|
|
* vwi(Vs2_vi[i + offset])
|
|
+ Vs3_vwi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
0x3f: vwmaccsu_vv({{
|
|
Vd_vwi[i] = vwi(Vs1_vi[i + offset])
|
|
* vwu(Vs2_vu[i + offset])
|
|
+ Vs3_vwi[i];
|
|
}}, OPMVV, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
// OPIVI
|
|
0x3: decode VFUNCT6 {
|
|
format VectorIntFormat {
|
|
0x00: vadd_vi({{
|
|
Vd_vi[i] = Vs2_vi[i] + (vi)sext<5>(SIMM5);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x03: vrsub_vi({{
|
|
Vd_vi[i] = (vi)sext<5>(SIMM5) - Vs2_vi[i];
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x09: vand_vi({{
|
|
Vd_vi[i] = Vs2_vi[i] & (vi)sext<5>(SIMM5);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x0a: vor_vi({{
|
|
Vd_vi[i] = Vs2_vi[i] | (vi)sext<5>(SIMM5);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x0b: vxor_vi({{
|
|
Vd_vi[i] = Vs2_vi[i] ^ (vi)sext<5>(SIMM5);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
0x0c: VectorGatherFormat::vrgather_vi({{
|
|
for (uint32_t i = 0; i < microVl; i++) {
|
|
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
|
|
if (this->vm || elem_mask(v0, ei)) {
|
|
const uint64_t idx =
|
|
(uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx;
|
|
Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0
|
|
: (idx < vs2_elems) ? Vs2_vu[idx]
|
|
: Vs3_vu[i];
|
|
}
|
|
}
|
|
}}, OPIVI, VectorMiscOp);
|
|
0x0e: VectorSlideUpFormat::vslideup_vi({{
|
|
const int offset = (int)(uint64_t)(SIMM5);
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vdIdx - vs2Idx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int elemOffset = vdOffset + vdIdx * microVlmax;
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
if (this->vm || elem_mask(v0, i + elemOffset)) {
|
|
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
}
|
|
}
|
|
}}, OPIVI, VectorMiscOp);
|
|
0x0f: VectorSlideDownFormat::vslidedown_vi({{
|
|
const int offset = (int)(uint64_t)(SIMM5);
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vs2Idx - vdIdx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
const int numVs2s = vtype_regs_per_group(vtype);
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const bool needZeroTail = numVs2s == vs2Idx + 1;
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int elemIdxBase = vdIdx * microVlmax;
|
|
vreg_t resVreg;
|
|
auto res = resVreg.as<vu>();
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
if (needZeroTail) {
|
|
for (int i = upperBound + vdOffset;
|
|
i < microVlmax; i++) {
|
|
res[i] = 0;
|
|
}
|
|
}
|
|
for (int i = vdOffset; i < microVl ; i++) {
|
|
if (vm || elem_mask(v0, i + elemIdxBase)) {
|
|
Vd_vu[i] = res[i];
|
|
}
|
|
}
|
|
}
|
|
}}, OPIVI, VectorMiscOp);
|
|
format VectorIntFormat {
|
|
0x10: decode VM {
|
|
0x0: vadc_vim({{
|
|
Vd_vi[i] = Vs2_vi[i] +
|
|
(vi)sext<5>(SIMM5) + elem_mask(v0, ei);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
// the unmasked versions (vm=1) are reserved
|
|
}
|
|
0x17: decode VM {
|
|
0x0: vmerge_vim({{
|
|
Vd_vi[i] = elem_mask(v0, ei)
|
|
? (vi)sext<5>(SIMM5)
|
|
: Vs2_vi[i];
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1: vmv_v_i({{
|
|
Vd_vi[i] = (vi)sext<5>(SIMM5);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
format VectorIntVxsatFormat{
|
|
0x20: vsaddu_vi({{
|
|
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], (vu)sext<5>(SIMM5),
|
|
vxsatptr);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x21: vsadd_vi({{
|
|
Vd_vi[i] = sat_add<vi>(Vs2_vi[i], (vi)sext<5>(SIMM5),
|
|
vxsatptr);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntFormat {
|
|
0x25: vsll_vi({{
|
|
Vd_vu[i] = Vs2_vu[i] << ((vu)SIMM5 & (sew - 1) & 0x1f);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x28: vsrl_vi({{
|
|
Vd_vu[i] = Vs2_vu[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x2a: vssrl_vi({{
|
|
int sh = SIMM5 & (vtype_SEW(vtype) - 1);
|
|
__uint128_t res = Vs2_vu[i];
|
|
|
|
res = int_rounding<__uint128_t>(
|
|
res, 0 /* TODO */, sh) >> sh;
|
|
|
|
Vd_vu[i] = res;
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x29: vsra_vi({{
|
|
Vd_vi[i] = Vs2_vi[i] >> ((vu)SIMM5 & (sew - 1) & 0x1f);
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x2b: vssra_vi({{
|
|
int sh = SIMM5 & (sew - 1);
|
|
__int128_t val = Vs2_vi[i];
|
|
|
|
val = int_rounding<__int128_t>(val,
|
|
xc->readMiscReg(MISCREG_VXRM), sh);
|
|
Vd_vi[i] = val >> sh;
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
// According to Spec Section 16.6,
|
|
// vm must be 1 (unmasked) in vmv<nr>r.v instructions.
|
|
0x27: decode VM { 0x1: decode SIMM3 {
|
|
format VMvWholeFormat {
|
|
0x0: vmv1r_v({{
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}}, OPIVI, VectorMiscOp);
|
|
0x1: vmv2r_v({{
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}}, OPIVI, VectorMiscOp);
|
|
0x3: vmv4r_v({{
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}}, OPIVI, VectorMiscOp);
|
|
0x7: vmv8r_v({{
|
|
Vd_ud[i] = Vs2_ud[i];
|
|
}}, OPIVI, VectorMiscOp);
|
|
}
|
|
}}
|
|
format VectorIntMaskFormat {
|
|
0x11: decode VM {
|
|
0x0: vmadc_vim({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5),
|
|
elem_mask(v0, ei)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1: vmadc_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vi[i], (vi)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
0x18: vmseq_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] == (vi)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x19: vmsne_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] != (vi)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1c: vmsleu_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] <= (vu)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1d: vmsle_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] <= (vi)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1e: vmsgtu_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] > (vu)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x1f: vmsgt_vi({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] > (vi)sext<5>(SIMM5)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntNarrowingFormat {
|
|
0x2c: vnsrl_wi({{
|
|
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
|
|
((vwu)SIMM5 & (sew * 2 - 1)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x2d: vnsra_wi({{
|
|
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
|
|
((vwu)SIMM5 & (sew * 2 - 1)));
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x2e: vnclipu_wi({{
|
|
vu max = std::numeric_limits<vu>::max();
|
|
uint64_t sign_mask =
|
|
std::numeric_limits<uint64_t>::max() << sew;
|
|
__uint128_t res = Vs2_vwu[i];
|
|
unsigned shift = VS1 & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__uint128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res & sign_mask) {
|
|
// TODO: vxsat
|
|
res = max;
|
|
}
|
|
|
|
Vd_vu[i + offset] = (vu)res;
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
0x2f: vnclip_wi({{
|
|
vi max = std::numeric_limits<vi>::max();
|
|
vi min = std::numeric_limits<vi>::min();
|
|
__int128_t res = Vs2_vwi[i];
|
|
unsigned shift = VS1 & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__int128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res < min) {
|
|
res = min;
|
|
// TODO: vxsat
|
|
} else if (res > max) {
|
|
res = max;
|
|
// TODO: vxsat
|
|
}
|
|
|
|
Vd_vi[i + offset] = (vi)res;
|
|
}}, OPIVI, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
// OPIVX
|
|
0x4: decode VFUNCT6 {
|
|
format VectorIntFormat {
|
|
0x0: vadd_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] + Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2: vsub_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] - Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x3: vrsub_vx({{
|
|
Vd_vu[i] = Rs1_vu - Vs2_vu[i];
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x4: vminu_vx({{
|
|
Vd_vu[i] = std::min(Vs2_vu[i], Rs1_vu);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x5: vmin_vx({{
|
|
Vd_vi[i] = std::min(Vs2_vi[i], Rs1_vi);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x6: vmaxu_vx({{
|
|
Vd_vu[i] = std::max(Vs2_vu[i], Rs1_vu);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x7: vmax_vx({{
|
|
Vd_vi[i] = std::max(Vs2_vi[i], Rs1_vi);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x9: vand_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] & Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0xa: vor_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] | Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0xb: vxor_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] ^ Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
0x0e: VectorSlideUpFormat::vslideup_vx({{
|
|
const int offset = (int)Rs1_vu;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vdIdx - vs2Idx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int elemOffset = vdOffset + vdIdx * microVlmax;
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
if (this->vm || elem_mask(v0, i + elemOffset)) {
|
|
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
}
|
|
}
|
|
}}, OPIVX, VectorMiscOp);
|
|
0x0f: VectorSlideDownFormat::vslidedown_vx({{
|
|
const int offset = (int)Rs1_vu;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vs2Idx - vdIdx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
const int numVs2s = vtype_regs_per_group(vtype);
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const bool needZeroTail = numVs2s == vs2Idx + 1;
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int elemIdxBase = vdIdx * microVlmax;
|
|
vreg_t resVreg;
|
|
auto res = resVreg.as<vu>();
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
if (needZeroTail) {
|
|
for (int i = upperBound + vdOffset;
|
|
i < microVlmax; i++) {
|
|
res[i] = 0;
|
|
}
|
|
}
|
|
for (int i = vdOffset; i < microVl ; i++) {
|
|
if (vm || elem_mask(v0, i + elemIdxBase)) {
|
|
Vd_vu[i] = res[i];
|
|
}
|
|
}
|
|
}
|
|
}}, OPIVX, VectorMiscOp);
|
|
0x0c: VectorGatherFormat::vrgather_vx({{
|
|
for (uint32_t i = 0; i < microVl; i++) {
|
|
uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
|
|
if (this->vm || elem_mask(v0, ei)) {
|
|
const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx;
|
|
Vd_vu[i] = (Rs1_vu >= vlmax) ? 0
|
|
: (idx < vs2_elems) ? Vs2_vu[idx]
|
|
: Vs3_vu[i];
|
|
}
|
|
}
|
|
}}, OPIVX, VectorMiscOp);
|
|
format VectorIntFormat {
|
|
0x10: decode VM {
|
|
0x0: vadc_vxm({{
|
|
Vd_vi[i] = Vs2_vi[i] + Rs1_vi + elem_mask(v0, ei);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
// the unmasked versions (vm=1) are reserved
|
|
}
|
|
0x12: decode VM {
|
|
0x0: vsbc_vxm({{
|
|
Vd_vi[i] = Vs2_vi[i] - Rs1_vi - elem_mask(v0, ei);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
// the unmasked versions (vm=1) are reserved
|
|
}
|
|
0x17: decode VM {
|
|
0x0: vmerge_vxm({{
|
|
Vd_vu[i] = elem_mask(v0, ei) ? Rs1_vu : Vs2_vu[i];
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1: decode VS2 {
|
|
0x0: vmv_v_x({{
|
|
Vd_vu[i] = Rs1_vu;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
}
|
|
format VectorIntVxsatFormat{
|
|
0x20: vsaddu_vx({{
|
|
Vd_vu[i] = sat_addu<vu>(Vs2_vu[i], Rs1_vu,
|
|
vxsatptr);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x21: vsadd_vx({{
|
|
Vd_vu[i] = sat_add<vi>(Vs2_vu[i], Rs1_vu,
|
|
vxsatptr);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x22: vssubu_vx({{
|
|
Vd_vu[i] = sat_subu<vu>(Vs2_vu[i], Rs1_vu,
|
|
vxsatptr);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x23: vssub_vx({{
|
|
Vd_vu[i] = sat_sub<vi>(Vs2_vu[i], Rs1_vu,
|
|
vxsatptr);
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x27: vsmul_vx({{
|
|
vi max = std::numeric_limits<vi>::max();
|
|
vi min = std::numeric_limits<vi>::min();
|
|
bool overflow = Rs1_vi == Vs2_vi[i] && Rs1_vi == min;
|
|
__int128_t result =
|
|
(__int128_t)Rs1_vi * (__int128_t)Vs2_vi[i];
|
|
result = int_rounding<__uint128_t>(
|
|
result, 0 /* TODO */, sew - 1);
|
|
result = result >> (sew - 1);
|
|
if (overflow) {
|
|
result = max;
|
|
*vxsatptr = true;
|
|
}
|
|
|
|
Vd_vi[i] = (vi)result;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntFormat {
|
|
0x25: vsll_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] << (Rs1_vu & (sew - 1));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x28: vsrl_vx({{
|
|
Vd_vu[i] = Vs2_vu[i] >> (Rs1_vu & (sew - 1));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x29: vsra_vx({{
|
|
Vd_vi[i] = Vs2_vi[i] >> (Rs1_vu & (sew - 1));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2a: vssrl_vx({{
|
|
int sh = Rs1_vu & (sew - 1);
|
|
__uint128_t val = Vs2_vu[i];
|
|
|
|
val = int_rounding<__uint128_t>(val,
|
|
xc->readMiscReg(MISCREG_VXRM), sh);
|
|
Vd_vu[i] = val >> sh;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2b: vssra_vx({{
|
|
int sh = Rs1_vu & (sew - 1);
|
|
__int128_t val = Vs2_vi[i];
|
|
|
|
val = int_rounding<__int128_t>(val,
|
|
xc->readMiscReg(MISCREG_VXRM), sh);
|
|
Vd_vi[i] = val >> sh;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntNarrowingFormat {
|
|
0x2c: vnsrl_wx({{
|
|
Vd_vu[i + offset] = (vu)(Vs2_vwu[i] >>
|
|
((vwu)Rs1_vu & (sew * 2 - 1)));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2d: vnsra_wx({{
|
|
Vd_vi[i + offset] = (vi)(Vs2_vwi[i] >>
|
|
((vwu)Rs1_vu & (sew * 2 - 1)));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2e: vnclipu_wx({{
|
|
vu max = std::numeric_limits<vu>::max();
|
|
uint64_t sign_mask =
|
|
std::numeric_limits<uint64_t>::max() << sew;
|
|
__uint128_t res = Vs2_vwu[i];
|
|
unsigned shift = Rs1_vu & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__uint128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res & sign_mask) {
|
|
// TODO: vxsat
|
|
res = max;
|
|
}
|
|
|
|
Vd_vu[i + offset] = (vu)res;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x2f: vnclip_wx({{
|
|
vi max = std::numeric_limits<vi>::max();
|
|
vi min = std::numeric_limits<vi>::min();
|
|
__int128_t res = Vs2_vwi[i];
|
|
unsigned shift = Rs1_vi & ((sew * 2) - 1);
|
|
|
|
res = int_rounding<__int128_t>(
|
|
res, 0 /* TODO */, shift) >> shift;
|
|
|
|
if (res < min) {
|
|
res = min;
|
|
// TODO: vxsat
|
|
} else if (res > max) {
|
|
res = max;
|
|
// TODO: vxsat
|
|
}
|
|
|
|
Vd_vi[i + offset] = (vi)res;
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
|
|
format VectorIntMaskFormat {
|
|
0x11: decode VM {
|
|
0x0: vmadc_vxm({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vi[i], Rs1_vi,
|
|
elem_mask(v0, ei)));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1: vmadc_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
carry_out(Vs2_vi[i], Rs1_vi));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
0x13: decode VM {
|
|
0x0: vmsbc_vxm({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
borrow_out(Vs2_vi[i], Rs1_vi,
|
|
elem_mask(v0, ei)));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1: vmsbc_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
borrow_out(Vs2_vi[i], Rs1_vi));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
0x18: vmseq_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] == Rs1_vu));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x19: vmsne_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] != Rs1_vu));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1a: vmsltu_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] < Rs1_vu));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1b: vmslt_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] < Rs1_vi));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1c: vmsleu_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] <= Rs1_vu));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1d: vmsle_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] <= Rs1_vi));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1e: vmsgtu_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vu[i] > Rs1_vu));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
0x1f: vmsgt_vx({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
(Vs2_vi[i] > Rs1_vi));
|
|
}}, OPIVX, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
// OPFVF
|
|
0x5: decode VFUNCT6 {
|
|
format VectorFloatFormat{
|
|
0x00: vfadd_vf({{
|
|
auto fd = fadd<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x02: vfsub_vf({{
|
|
auto fd = fsub<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x04: vfmin_vf({{
|
|
auto fd = fmin<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x06: vfmax_vf({{
|
|
auto fd = fmax<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x08: vfsgnj_vf({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
false, false).v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x09: vfsgnjn_vf({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
true, false).v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x0a: vfsgnjx_vf({{
|
|
Vd_vu[i] = fsgnj<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
false, true).v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
}
|
|
0x0e: VectorFloatSlideUpFormat::vfslide1up_vf({{
|
|
const int offset = 1;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vdIdx - vs2Idx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int elemOffset = vdOffset + vdIdx * microVlmax;
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
if (this->vm || elem_mask(v0, i + elemOffset)) {
|
|
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
}
|
|
// TODO: dirty code
|
|
if (vdIdx == 0 && vs2Idx == 0 &&
|
|
(this->vm || elem_mask(v0, 0))) {
|
|
tmp_d0.as<vu>()[0] = Rs1_vu;
|
|
}
|
|
}
|
|
}}, OPFVF, VectorMiscOp);
|
|
0x0f: VectorFloatSlideDownFormat::vfslide1down_vf({{
|
|
const int offset = 1;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vs2Idx - vdIdx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
const int numVs2s = vtype_regs_per_group(vtype);
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const bool needZeroTail = numVs2s == vs2Idx + 1;
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int elemIdxBase = vdIdx * microVlmax;
|
|
vreg_t resVreg;
|
|
auto res = resVreg.as<vu>();
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
if (needZeroTail) {
|
|
for (int i = upperBound + vdOffset;
|
|
i < microVlmax; i++) {
|
|
res[i] = 0;
|
|
}
|
|
}
|
|
for (int i = vdOffset; i < microVl ; i++) {
|
|
if (vm || elem_mask(v0, i + elemIdxBase)) {
|
|
Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
|
|
? res[i]
|
|
: Rs1_vu;
|
|
}
|
|
}
|
|
}
|
|
}}, OPFVF, VectorMiscOp);
|
|
// VRFUNARY0
|
|
0x10: decode VS2 {
|
|
0x00: decode VM {
|
|
// The encodings corresponding to the masked versions
|
|
// (vm=0) of vfmv.s.f are reserved
|
|
0x1: VectorNonSplitFormat::vfmv_s_f({{
|
|
if (this->vl) {
|
|
auto fd = ftype_freg<et>(freg(Fs1_bits));
|
|
Vd_vu[0] = fd.v;
|
|
}
|
|
}}, OPFVV, VectorMiscOp);
|
|
}
|
|
}
|
|
format VectorFloatFormat{
|
|
0x17: decode VM {
|
|
0x0: vfmerge_vfm({{
|
|
Vd_vu[i] = elem_mask(v0, ei)
|
|
? ftype_freg<et>(freg(Fs1_bits)).v
|
|
: Vs2_vu[i];
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x1: vfmv_v_f({{
|
|
auto fd = ftype_freg<et>(freg(Fs1_bits));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
}
|
|
}
|
|
format VectorFloatMaskFormat {
|
|
0x18: vmfeq_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
feq<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits))));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x19: vmfle_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
fle<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits))));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x1b: vmflt_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
flt<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits))));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x1c: vmfne_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
!feq<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits))));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x1d: vmfgt_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
flt<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i])));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x1f: vmfge_vf({{
|
|
Vd_ub[(i + offset)/8] = ASSIGN_VD_BIT(i + offset,
|
|
fle<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i])));
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
}
|
|
format VectorFloatFormat{
|
|
0x20: vfdiv_vf({{
|
|
auto fd = fdiv<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x21: vfrdiv_vf({{
|
|
auto fd = fdiv<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x24: vfmul_vf({{
|
|
auto fd = fmul<et>(ftype<et>(Vs2_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x27: vfrsub_vf({{
|
|
auto fd = fsub<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x28: vfmadd_vf({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x29: vfnmadd_vf({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
fneg(ftype<et>(Vs2_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2a: vfmsub_vf({{
|
|
auto fd = fmadd<et>(ftype<et>(Vs3_vu[i]),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
fneg(ftype<et>(Vs2_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2b: vfnmsub_vf({{
|
|
auto fd = fmadd<et>(fneg(ftype<et>(Vs3_vu[i])),
|
|
ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2c: vfmacc_vf({{
|
|
auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs3_vu[i]));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2d: vfnmacc_vf({{
|
|
auto fd = fmadd<et>(
|
|
fneg(ftype_freg<et>(freg(Fs1_bits))),
|
|
ftype<et>(Vs2_vu[i]),
|
|
fneg(ftype<et>(Vs3_vu[i]))
|
|
);
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2e: vfmsac_vf({{
|
|
auto fd = fmadd<et>(ftype_freg<et>(freg(Fs1_bits)),
|
|
ftype<et>(Vs2_vu[i]),
|
|
fneg(ftype<et>(Vs3_vu[i])));
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x2f: vfnmsac_vf({{
|
|
auto fd = fmadd<et>(
|
|
fneg(ftype_freg<et>(freg(Fs1_bits))),
|
|
ftype<et>(Vs2_vu[i]),
|
|
ftype<et>(Vs3_vu[i])
|
|
);
|
|
Vd_vu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
}
|
|
format VectorFloatWideningFormat {
|
|
0x30: vfwadd_vf({{
|
|
auto fd = fadd<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x32: vfwsub_vf({{
|
|
auto fd = fsub<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x34: vfwadd_wf({{
|
|
auto fd = fadd<ewt>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x36: vfwsub_wf({{
|
|
auto fd = fsub<ewt>(
|
|
ftype<ewt>(Vs2_vwu[i]),
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x38: vfwmul_vf({{
|
|
auto fd = fmul<ewt>(
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x3c: vfwmacc_vf({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
ftype<ewt>(Vs3_vwu[i]));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x3d: vfwnmacc_vf({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fneg(ftype<ewt>(Vs3_vwu[i])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x3e: vfwmsac_vf({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(ftype_freg<et>(freg(Fs1_bits))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
fneg(ftype<ewt>(Vs3_vwu[i])));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
0x3f: vfwnmsac_vf({{
|
|
auto fd = fmadd<ewt>(
|
|
fwiden(fneg(ftype_freg<et>(freg(Fs1_bits)))),
|
|
fwiden(ftype<et>(Vs2_vu[i + offset])),
|
|
ftype<ewt>(Vs3_vwu[i]));
|
|
Vd_vwu[i] = fd.v;
|
|
}}, OPFVF, VectorFloatArithOp);
|
|
}
|
|
}
|
|
// OPMVX
|
|
0x6: decode VFUNCT6 {
|
|
format VectorIntFormat {
|
|
0x08: vaaddu_vx({{
|
|
__uint128_t res = (__uint128_t)Vs2_vu[i] + Rs1_vu;
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vu[i] = res >> 1;
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x09: vaadd_vx({{
|
|
__uint128_t res = (__uint128_t)Vs2_vi[i] + Rs1_vi;
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vi[i] = res >> 1;
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
}
|
|
0x0e: VectorSlideUpFormat::vslide1up_vx({{
|
|
const int offset = 1;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vdIdx - vs2Idx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int elemOffset = vdOffset + vdIdx * microVlmax;
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
if (this->vm || elem_mask(v0, i + elemOffset)) {
|
|
Vd_vu[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
}
|
|
// TODO: dirty code
|
|
if (vdIdx == 0 && vs2Idx == 0 &&
|
|
(this->vm || elem_mask(v0, 0))) {
|
|
tmp_d0.as<vu>()[0] = Rs1_vu;
|
|
}
|
|
}
|
|
}}, OPIVX, VectorMiscOp);
|
|
0x0f: VectorSlideDownFormat::vslide1down_vx({{
|
|
const int offset = 1;
|
|
const int microVlmax = vtype_VLMAX(machInst.vtype8,
|
|
vlen, true);
|
|
const int vregOffset = vs2Idx - vdIdx;
|
|
const int offsetInVreg = offset - vregOffset * microVlmax;
|
|
const int numVs2s = vtype_regs_per_group(vtype);
|
|
if (std::abs(offsetInVreg) < uint32_t(microVlmax)) {
|
|
const bool needZeroTail = numVs2s == vs2Idx + 1;
|
|
const int upperBound = (offsetInVreg >= 0)
|
|
? microVlmax - offsetInVreg
|
|
: microVlmax + offsetInVreg;
|
|
const int vdOffset = (offsetInVreg >= 0)
|
|
? 0
|
|
: -offsetInVreg;
|
|
const int vs2Offset = (offsetInVreg >= 0)
|
|
? offsetInVreg
|
|
: 0;
|
|
const int elemIdxBase = vdIdx * microVlmax;
|
|
vreg_t resVreg;
|
|
auto res = resVreg.as<vu>();
|
|
for (int i = 0;
|
|
i < upperBound && i + vdOffset < microVl;
|
|
i++) {
|
|
res[i + vdOffset] = Vs2_vu[i + vs2Offset];
|
|
}
|
|
if (needZeroTail) {
|
|
for (int i = upperBound + vdOffset;
|
|
i < microVlmax; i++) {
|
|
res[i] = 0;
|
|
}
|
|
}
|
|
for (int i = vdOffset; i < microVl ; i++) {
|
|
if (vm || elem_mask(v0, i + elemIdxBase)) {
|
|
Vd_vu[i] = (i + elemIdxBase != machInst.vl - 1)
|
|
? res[i]
|
|
: Rs1_vu;
|
|
}
|
|
}
|
|
}
|
|
}}, OPIVX, VectorMiscOp);
|
|
// VRXUNARY0
|
|
0x10: decode VS2 {
|
|
0x00: decode VM {
|
|
// The encodings corresponding to the masked versions
|
|
// (vm=0) of vmv.s.x are reserved.
|
|
0x1: VectorNonSplitFormat::vmv_s_x({{
|
|
if (this->vl) {
|
|
Vd_vu[0] = Rs1_vu;
|
|
}
|
|
}}, OPMVX, VectorMiscOp);
|
|
}
|
|
}
|
|
format VectorIntFormat {
|
|
0x0a: vasubu_vx({{
|
|
__uint128_t res = (__uint128_t)Vs2_vu[i] - Rs1_vu;
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vu[i] = res >> 1;
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x0b: vasub_vx({{
|
|
__uint128_t res = (__uint128_t)Vs2_vi[i] - Rs1_vi;
|
|
res = int_rounding<__uint128_t>(res, 0 /* TODO */, 1);
|
|
Vd_vi[i] = res >> 1;
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x20: vdivu_vx({{
|
|
Vd_vu[i] = divu<vu>(Vs2_vu[i], Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x21: vdiv_vx({{
|
|
Vd_vi[i] = div<vi>(Vs2_vi[i], Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x22: vremu_vx({{
|
|
Vd_vu[i] = remu<vu>(Vs2_vu[i], Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x23: vrem_vx({{
|
|
Vd_vi[i] = rem<vi>(Vs2_vi[i], Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x24: vmulhu_vx({{
|
|
Vd_vu[i] = mulhu<vu>(Vs2_vu[i], Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x25: vmul_vx({{
|
|
Vd_vi[i] = Vs2_vi[i] * Rs1_vi;
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x26: vmulhsu_vx({{
|
|
Vd_vi[i] = mulhsu<vi>(Vs2_vi[i], Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x27: vmulh_vx({{
|
|
Vd_vi[i] = mulh<vi>(Vs2_vi[i], Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x29: vmadd_vx({{
|
|
Vd_vi[i] = Vs3_vi[i] * Rs1_vi + Vs2_vi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x2b: vnmsub_vx({{
|
|
Vd_vi[i] = -(Vs3_vi[i] * Rs1_vi) + Vs2_vi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x2d: vmacc_vx({{
|
|
Vd_vi[i] = Vs2_vi[i] * Rs1_vi + Vs3_vi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x2f: vnmsac_vx({{
|
|
Vd_vi[i] = -(Vs2_vi[i] * Rs1_vi) + Vs3_vi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
}
|
|
format VectorIntWideningFormat {
|
|
0x30: vwaddu_vx({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) + vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x31: vwadd_vx({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) + vwi(Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x32: vwsubu_vx({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) - vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x33: vwsub_vx({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) - vwi(Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x34: vwaddu_wx({{
|
|
Vd_vwu[i] = Vs2_vwu[i] + vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x35: vwadd_wx({{
|
|
Vd_vwi[i] = Vs2_vwi[i] + vwi(Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x36: vwsubu_wx({{
|
|
Vd_vwu[i] = Vs2_vwu[i] - vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x37: vwsub_wx({{
|
|
Vd_vwi[i] = Vs2_vwi[i] - vwi(Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x38: vwmulu_vx({{
|
|
Vd_vwu[i] = vwu(Vs2_vu[i + offset]) * vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3a: vwmulsu_vx({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwu(Rs1_vu);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3b: vwmul_vx({{
|
|
Vd_vwi[i] = vwi(Vs2_vi[i + offset]) * vwi(Rs1_vi);
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3c: vwmaccu_vx({{
|
|
Vd_vwu[i] = vwu(Rs1_vu) * vwu(Vs2_vu[i + offset])
|
|
+ Vs3_vwu[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3d: vwmacc_vx({{
|
|
Vd_vwi[i] = vwi(Rs1_vi) * vwi(Vs2_vi[i + offset])
|
|
+ Vs3_vwi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3e: vwmaccus_vx({{
|
|
Vd_vwi[i] = vwu(Rs1_vu) * vwi(Vs2_vi[i + offset])
|
|
+ Vs3_vwi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
0x3f: vwmaccsu_vx({{
|
|
Vd_vwi[i] = vwi(Rs1_vi) * vwu(Vs2_vu[i + offset])
|
|
+ Vs3_vwi[i];
|
|
}}, OPMVX, VectorIntegerArithOp);
|
|
}
|
|
}
|
|
0x7: decode BIT31 {
|
|
format VConfOp {
|
|
0x0: vsetvli({{
|
|
uint64_t rd_bits = RD;
|
|
uint64_t rs1_bits = RS1;
|
|
uint64_t requested_vl = Rs1_ud;
|
|
uint64_t requested_vtype = zimm11;
|
|
uint32_t vlen = VlenbBits * 8;
|
|
uint32_t vlmax = getVlmax(Vtype, vlen);
|
|
uint32_t current_vl = VL;
|
|
}}, {{
|
|
Rd_ud = new_vl;
|
|
VL = new_vl;
|
|
Vtype = new_vtype;
|
|
}}, VSetVlDeclare, VSetVliBranchTarget
|
|
, VectorConfigOp, IsUncondControl
|
|
, IsIndirectControl);
|
|
0x1: decode BIT30 {
|
|
0x0: vsetvl({{
|
|
uint64_t rd_bits = RD;
|
|
uint64_t rs1_bits = RS1;
|
|
uint64_t requested_vl = Rs1_ud;
|
|
uint64_t requested_vtype = Rs2_ud;
|
|
uint32_t vlen = VlenbBits * 8;
|
|
uint32_t vlmax = getVlmax(Vtype, vlen);
|
|
uint32_t current_vl = VL;
|
|
}}, {{
|
|
Rd_ud = new_vl;
|
|
VL = new_vl;
|
|
Vtype = new_vtype;
|
|
}}, VSetVlDeclare, VSetVlBranchTarget
|
|
, VectorConfigOp, IsUncondControl
|
|
, IsIndirectControl);
|
|
0x1: vsetivli({{
|
|
uint64_t rd_bits = RD;
|
|
uint64_t rs1_bits = -1;
|
|
uint64_t requested_vl = uimm;
|
|
uint64_t requested_vtype = zimm10;
|
|
uint32_t vlen = VlenbBits * 8;
|
|
uint32_t vlmax = getVlmax(Vtype, vlen);
|
|
uint32_t current_vl = VL;
|
|
}}, {{
|
|
Rd_ud = new_vl;
|
|
VL = new_vl;
|
|
Vtype = new_vtype;
|
|
}}, VSetiVliDeclare, VSetiVliBranchTarget
|
|
, VectorConfigOp, IsUncondControl
|
|
, IsDirectControl);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
0x18: decode FUNCT3 {
|
|
format BOp {
|
|
0x0: beq({{
|
|
if (rvSext(Rs1) == rvSext(Rs2)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x1: bne({{
|
|
if (rvSext(Rs1) != rvSext(Rs2)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x4: blt({{
|
|
if (rvSext(Rs1_sd) < rvSext(Rs2_sd)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x5: bge({{
|
|
if (rvSext(Rs1_sd) >= rvSext(Rs2_sd)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x6: bltu({{
|
|
if (rvZext(Rs1) < rvZext(Rs2)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
0x7: bgeu({{
|
|
if (rvZext(Rs1) >= rvZext(Rs2)) {
|
|
NPC = rvZext(PC + imm);
|
|
} else {
|
|
NPC = rvZext(NPC);
|
|
}
|
|
}}, IsDirectControl, IsCondControl);
|
|
}
|
|
}
|
|
|
|
0x19: decode FUNCT3 {
|
|
0x0: Jump::jalr({{
|
|
Rd = rvSext(NPC);
|
|
NPC = rvZext((imm + Rs1) & (~0x1));
|
|
}}, IsIndirectControl, IsUncondControl);
|
|
}
|
|
|
|
0x1b: JOp::jal({{
|
|
Rd = rvSext(NPC);
|
|
NPC = rvZext(PC + imm);
|
|
}}, IsDirectControl, IsUncondControl);
|
|
|
|
0x1c: decode FUNCT3 {
|
|
format SystemOp {
|
|
0x0: decode FUNCT7 {
|
|
0x0: decode RS2 {
|
|
0x0: ecall({{
|
|
return std::make_shared<SyscallFault>(
|
|
(PrivilegeMode)xc->readMiscReg(MISCREG_PRV));
|
|
}}, IsSerializeAfter, IsNonSpeculative, IsSyscall,
|
|
No_OpClass);
|
|
0x1: ebreak({{
|
|
return std::make_shared<BreakpointFault>(
|
|
xc->pcState());
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x2: uret({{
|
|
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
|
if (!misa.rvn) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"sret can't execute without N systems",
|
|
machInst);
|
|
}
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
status.uie = status.upie;
|
|
status.upie = 1;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
NPC = xc->readMiscReg(MISCREG_UEPC);
|
|
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
|
|
}
|
|
0x8: decode RS2 {
|
|
0x2: sret({{
|
|
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
|
if (!misa.rvs) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"sret can't execute without RVS",
|
|
machInst);
|
|
}
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
auto pm = (PrivilegeMode)xc->readMiscReg(
|
|
MISCREG_PRV);
|
|
if (pm == PRV_U ||
|
|
(pm == PRV_S && status.tsr == 1)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"sret in user mode or TSR enabled",
|
|
machInst);
|
|
NPC = NPC;
|
|
} else {
|
|
xc->setMiscReg(MISCREG_PRV, status.spp);
|
|
status.sie = status.spie;
|
|
status.spie = 1;
|
|
status.spp = PRV_U;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
NPC = xc->readMiscReg(MISCREG_SEPC);
|
|
}
|
|
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
|
|
0x5: wfi({{
|
|
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
auto pm = (PrivilegeMode)xc->readMiscReg(
|
|
MISCREG_PRV);
|
|
if (misa.rvs && (pm == PRV_U ||
|
|
(pm == PRV_S && status.tw == 1))) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"wfi in user mode or TW enabled",
|
|
machInst);
|
|
}
|
|
// Go to sleep only if there's no pending interrupt
|
|
// at all, including masked interrupts.
|
|
auto tc = xc->tcBase();
|
|
auto cpu = tc->getCpuPtr();
|
|
auto ic = dynamic_cast<RiscvISA::Interrupts*>(
|
|
cpu->getInterruptController(tc->threadId()));
|
|
panic_if(!ic, "Invalid Interrupt Controller.");
|
|
if (ic->readIP() == 0
|
|
&& xc->readMiscReg(MISCREG_NMIP) == 0) {
|
|
tc->quiesce();
|
|
}
|
|
}}, IsNonSpeculative, IsQuiesce,
|
|
IsSerializeAfter, No_OpClass, IsSquashAfter);
|
|
}
|
|
0x9: sfence_vma({{
|
|
MISA misa = xc->readMiscReg(MISCREG_ISA);
|
|
if (!misa.rvs) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"sfence_vma can't execute without RVS",
|
|
machInst);
|
|
}
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
auto pm = (PrivilegeMode)xc->readMiscReg(MISCREG_PRV);
|
|
if (pm == PRV_U || (pm == PRV_S && status.tvm == 1)) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"sfence in user mode or TVM enabled",
|
|
machInst);
|
|
}
|
|
xc->tcBase()->getMMUPtr()->demapPage(Rs1, Rs2);
|
|
}}, IsNonSpeculative, IsSerializeAfter, No_OpClass);
|
|
0x18: mret({{
|
|
if (xc->readMiscReg(MISCREG_PRV) != PRV_M) {
|
|
return std::make_shared<IllegalInstFault>(
|
|
"mret at lower privilege", machInst);
|
|
NPC = NPC;
|
|
} else {
|
|
STATUS status = xc->readMiscReg(MISCREG_STATUS);
|
|
xc->setMiscReg(MISCREG_PRV, status.mpp);
|
|
xc->setMiscReg(MISCREG_NMIE, 1);
|
|
status.mie = status.mpie;
|
|
status.mpie = 1;
|
|
status.mpp = PRV_U;
|
|
xc->setMiscReg(MISCREG_STATUS, status);
|
|
NPC = xc->readMiscReg(MISCREG_MEPC);
|
|
}
|
|
}}, IsSerializeAfter, IsNonSpeculative, IsReturn);
|
|
}
|
|
}
|
|
format CSROp {
|
|
0x1: csrrw({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(Rs1);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x2: csrrs({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(data | Rs1);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x3: csrrc({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(data & ~Rs1);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x5: csrrwi({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(uimm);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x6: csrrsi({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(data | uimm);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
0x7: csrrci({{
|
|
Rd = rvSext(data);
|
|
data = rvZext(data & ~uimm);
|
|
}}, IsSerializeAfter, IsNonSpeculative, No_OpClass);
|
|
}
|
|
}
|
|
|
|
0x1e: M5Op::M5Op();
|
|
}
|
|
}
|