From adb177dab68c954d3bf91d1eda3e5dfac740eb13 Mon Sep 17 00:00:00 2001 From: Chong-Teng Wang <0909kfcmailo@gmail.com> Date: Fri, 17 May 2024 01:12:35 +0800 Subject: [PATCH] arch-riscv: Fix vrgather instruction (#1134) This commit fixes the implementation of vrgather instruction based on rvv 1.0. In section 16.4. Vector Register Gather Instructions, > Vector-scalar and vector-immediate forms of the register gather are also provided. These read one element from the source vector at the given index, and write this value to the active elements of the destination vector register. The index value in the scalar register and the immediate, zero-extended to XLEN bits, are treated as unsigned integers. If XLEN > SEW, the index value is not truncated to SEW bits. The fix zero-extends the index value in the scalar register and the immediate. --- src/arch/riscv/isa/decoder.isa | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/arch/riscv/isa/decoder.isa b/src/arch/riscv/isa/decoder.isa index 252aba256c..16907ad7f0 100644 --- a/src/arch/riscv/isa/decoder.isa +++ b/src/arch/riscv/isa/decoder.isa @@ -3751,10 +3751,10 @@ decode QUADRANT default Unknown::unknown() { 0x0c: VectorGatherFormat::vrgather_vi({{ for (uint32_t i = 0; i < microVl; i++) { uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + uint64_t zextImm = rvZext(SIMM5); if (this->vm || elem_mask(v0, ei)) { - const uint64_t idx = - (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx; - Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0 + const uint64_t idx = zextImm - vs2_elems * vs2_idx; + Vd_vu[i] = (zextImm >= vlmax) ? 0 : (idx < vs2_elems) ? Vs2_vu[idx] : Vs3_vu[i]; } @@ -4086,9 +4086,10 @@ decode QUADRANT default Unknown::unknown() { 0x0c: VectorGatherFormat::vrgather_vx({{ for (uint32_t i = 0; i < microVl; i++) { uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias; + uint64_t zextRs1 = rvZext(Rs1); if (this->vm || elem_mask(v0, ei)) { - const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx; - Vd_vu[i] = (Rs1_vu >= vlmax) ? 0 + const uint64_t idx = zextRs1 - vs2_elems * vs2_idx; + Vd_vu[i] = (zextRs1 >= vlmax) ? 0 : (idx < vs2_elems) ? Vs2_vu[idx] : Vs3_vu[i]; }