arch-riscv: Fix vrgather instruction (#1134)

This commit fixes the implementation of vrgather instruction based on rvv 1.0. In section 16.4. Vector Register Gather Instructions, > Vector-scalar and vector-immediate forms of the register gather are also provided. These read one element from the source vector at the given index, and write this value to the active elements of the destination vector register. The index value in the scalar register and the immediate, zero-extended to XLEN bits, are treated as unsigned integers. If XLEN > SEW, the index value is not truncated to SEW bits. The fix zero-extends the index value in the scalar register and the immediate.
2024-05-17 01:12:35 +08:00
parent 97a87a7c84
commit adb177dab6
1 changed files with 6 additions and 5 deletions
--- a/src/arch/riscv/isa/decoder.isa
+++ b/src/arch/riscv/isa/decoder.isa
@@ -3751,10 +3751,10 @@ decode QUADRANT default Unknown::unknown() {
                0x0c: VectorGatherFormat::vrgather_vi({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        uint64_t zextImm = rvZext(SIMM5);
                        if (this->vm || elem_mask(v0, ei)) {
-                            const uint64_t idx =
-                                (uint64_t)sext<5>(SIMM5) - vs2_elems * vs2_idx;
-                            Vd_vu[i] = ((uint64_t)sext<5>(SIMM5) >= vlmax) ? 0
+                            const uint64_t idx = zextImm - vs2_elems * vs2_idx;
+                            Vd_vu[i] = (zextImm >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i];
                        }
@@ -4086,9 +4086,10 @@ decode QUADRANT default Unknown::unknown() {
                0x0c: VectorGatherFormat::vrgather_vx({{
                    for (uint32_t i = 0; i < microVl; i++) {
                        uint32_t ei = i + vs1_idx * vs1_elems + vs1_bias;
+                        uint64_t zextRs1 = rvZext(Rs1);
                        if (this->vm || elem_mask(v0, ei)) {
-                            const uint64_t idx = Rs1_vu - vs2_elems * vs2_idx;
-                            Vd_vu[i] = (Rs1_vu >= vlmax) ? 0
+                            const uint64_t idx = zextRs1 - vs2_elems * vs2_idx;
+                            Vd_vu[i] = (zextRs1 >= vlmax) ? 0
                                : (idx < vs2_elems) ? Vs2_vu[idx]
                                : Vs3_vu[i];
                        }