From fbe698536570941f153f22a34ddb82e79f6c600f Mon Sep 17 00:00:00 2001
From: Tommaso Marinelli <Tommaso.Marinelli@imec.be>
Date: Thu, 4 Jul 2024 19:35:08 +0200
Subject: [PATCH 1/4] arch-riscv: Fix widening instructions vectors overlap
 check

This commit fixes the overlap check between VS2 and VD register groups
in vector widening instructions. While the narrowing instructions check
is correct, the widening one has to differentiate between two cases
(Vs2 EEW = 2*SEW and Vs2 EEW = SEW). In the first case, overlap is
allowed, as the EEW is the same as Vd. In the second case, the overlap
legality check has to be adapted to use the Vs2 EMUL to calculate the
boundaries. The rule has been derived again from Section 5.2 of RISC-V
"V" Vector Extension specifications, version 1.0.

The patch also includes some small code refactoring, e.g. using
already defined vlmul and constants for vector operands.

Fixes issue #442.

Change-Id: Ic87095fb9079e6c8f53b9a0d79fbf531a85dc71d
---
 src/arch/riscv/isa/formats/vector_arith.isa | 45 +++++++++++++--------
 1 file changed, 28 insertions(+), 17 deletions(-)
diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
index 3862b1de02..2333710d4f 100644
--- a/src/arch/riscv/isa/formats/vector_arith.isa
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -79,38 +79,45 @@ let {{
             uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;
             ''' + code
 
-    def wideningOpRegisterConstraintChecks(code):
-        return '''
-            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+    def wideningOpRegisterConstraintChecks(code, src2_dw):
+        constraint_checks = '''
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
             if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) {
                 std::string error =
                     csprintf("Unaligned Vd group in Widening op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            if ((machInst.vs2 <= machInst.vd) && (machInst.vd < (machInst.vs2 + num_microops - 1))) {
-                // A destination vector register group can overlap a source vector
-                // register group if The destination EEW is greater than the source
-                // EEW, the source EMUL is at least 1, and the overlap is in the
-                // highest- numbered part of the destination register group.
+            '''
+        if not src2_dw:
+            constraint_checks += '''
+            if (((vlmul < 0) && (VS2 == VD)) ||
+                ((vlmul >= 0) && (VS2 < VD + num_microops - (1 << vlmul)) &&
+                 (VD < VS2 + (1 << vlmul)))) {
+                // A destination vector register group can overlap a source
+                // vector register group if the destination EEW is greater than
+                // the source EEW, the source EMUL is at least 1, and the
+                // overlap is in the highest- numbered part of the destination
+                // register group.
                 std::string error =
                     csprintf("Unsupported overlap in Vs2 and Vd for Widening op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            ''' + code
+            '''
+        return constraint_checks + code
 
     def narrowingOpRegisterConstraintChecks(code):
         return '''
-            const uint32_t num_microops = 1 << std::max<int64_t>(0, vtype_vlmul(machInst.vtype8) + 1);
+            const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
             if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) {
                 std::string error =
                     csprintf("Unaligned VS2 group in Narrowing op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            if ((machInst.vs2 < machInst.vd) && (machInst.vd <= (VS2 + num_microops - 1))) {
-                // A destination vector register group can overlap a source vector
-                // register group The destination EEW is smaller than the source EEW
-                // and the overlap is in the lowest-numbered part of the source
-                // register group
+            if ((VS2 < VD) && (VD <= (VS2 + num_microops - 1))) {
+                // A destination vector register group can overlap a source
+                // vector register group if the destination EEW is smaller than
+                // the source EEW and the overlap is in the lowest-numbered
+                // part of the source register group
                 std::string error =
                     csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
@@ -329,10 +336,12 @@ def format VectorIntWideningFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorIntFormat: %s" % category)
     src2_reg_id = ""
+    src2_dw = False
     if inst_suffix in ["vv", "vx"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
     elif inst_suffix in ["wv", "wx"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+        src2_dw = True
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
 
@@ -355,7 +364,7 @@ def format VectorIntWideningFormat(code, category, *flags) {{
         code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code)
+    code = wideningOpRegisterConstraintChecks(code, src2_dw)
 
     vm_decl_rd = ""
     if v0_required:
@@ -746,10 +755,12 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorFloatFormat: %s" % category)
     src2_reg_id = ""
+    src2_dw = False
     if inst_suffix in ["vv", "vf"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
     elif inst_suffix in ["wv", "wf"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+        src2_dw = True
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
 
@@ -773,7 +784,7 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     code = loopWrapper(code)
     code = fflags_wrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code)
+    code = wideningOpRegisterConstraintChecks(code, src2_dw)
 
     vm_decl_rd = ""
     if v0_required:

From 5b693fd8b6c28683b2c43a73d1a58a4900b27538 Mon Sep 17 00:00:00 2001
From: Tommaso Marinelli <Tommaso.Marinelli@imec.be>
Date: Tue, 9 Jul 2024 03:39:53 +0200
Subject: [PATCH 2/4] arch-riscv: Remove duplicate line

Change-Id: I32200aad5a59c9fd85f6ed783a4cebb841bf6ff1
---
 src/arch/riscv/isa/formats/vector_arith.isa | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
index 2333710d4f..e6165000d1 100644
--- a/src/arch/riscv/isa/formats/vector_arith.isa
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -553,7 +553,6 @@ def format VectorGatherFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorIntFormat: %s" % category)
     src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]"
-    src2_reg_id = "vecRegClass[_machInst.vs2 + vs2_idx]"
 
     # vtmp0 as dummy src reg to create dependency with pin vd micro
     src3_reg_id = "vecRegClass[VecMemInternalReg0 + vd_idx]"

From a8b7e9727d79319f8e1c540bc7d1e1b96fe65053 Mon Sep 17 00:00:00 2001
From: Tommaso Marinelli <Tommaso.Marinelli@imec.be>
Date: Wed, 10 Jul 2024 00:14:45 +0200
Subject: [PATCH 3/4] arch-riscv: Generalize widening/narrowing vectors overlap
 check

As of now, the widening/narrowing vector register groups overlap check
always assumes a SEW multiplication factor equal to 2 (for either VD or
VS2). This commits aims at making this check more generic.

Change-Id: I4311fc3624cd324ccfdf2a1920a19efc85357120
---
 src/arch/riscv/isa/formats/vector_arith.isa | 53 +++++++++++++--------
 1 file changed, 32 insertions(+), 21 deletions(-)

diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
index e6165000d1..0b3aebe08a 100644
--- a/src/arch/riscv/isa/formats/vector_arith.isa
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -79,20 +79,25 @@ let {{
             uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;
             ''' + code
 
-    def wideningOpRegisterConstraintChecks(code, src2_dw):
+    def wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul):
+        src2_sew_mul_bits = src2_sew_mul.bit_length() - 1
+        dest_sew_mul_bits = dest_sew_mul.bit_length() - 1
         constraint_checks = '''
-            const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
-            if ((machInst.vd % alignToPowerOfTwo(num_microops)) != 0) {
+            const uint32_t num_microops =
+                1 << std::max<int64_t>(0, vlmul + %d);
+            if ((machInst.vd %% alignToPowerOfTwo(num_microops)) != 0) {
                 std::string error =
                     csprintf("Unaligned Vd group in Widening op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            '''
-        if not src2_dw:
+            ''' % dest_sew_mul_bits
+        if src2_sew_mul_bits != dest_sew_mul_bits:
             constraint_checks += '''
-            if (((vlmul < 0) && (VS2 == VD)) ||
-                ((vlmul >= 0) && (VS2 < VD + num_microops - (1 << vlmul)) &&
-                 (VD < VS2 + (1 << vlmul)))) {
+            const int64_t vs2_emul = vlmul + %d;
+            if (((vs2_emul < 0) && (VS2 == VD)) ||
+                ((vs2_emul >= 0) &&
+                 (VS2 < VD + num_microops - (1 << vs2_emul)) &&
+                 (VD < VS2 + (1 << vs2_emul)))) {
                 // A destination vector register group can overlap a source
                 // vector register group if the destination EEW is greater than
                 // the source EEW, the source EMUL is at least 1, and the
@@ -102,13 +107,15 @@ let {{
                     csprintf("Unsupported overlap in Vs2 and Vd for Widening op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            '''
+            ''' % src2_sew_mul_bits
         return constraint_checks + code
 
-    def narrowingOpRegisterConstraintChecks(code):
+    def narrowingOpRegisterConstraintChecks(code, src2_sew_mul):
+        src2_sew_mul_bits = src2_sew_mul.bit_length() - 1
         return '''
-            const uint32_t num_microops = 1 << std::max<int64_t>(0, vlmul + 1);
-            if ((machInst.vs2 % alignToPowerOfTwo(num_microops)) != 0) {
+            const uint32_t num_microops =
+                1 << std::max<int64_t>(0, vlmul + %d);
+            if ((machInst.vs2 %% alignToPowerOfTwo(num_microops)) != 0) {
                 std::string error =
                     csprintf("Unaligned VS2 group in Narrowing op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
@@ -122,7 +129,7 @@ let {{
                     csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-        ''' + code
+        ''' % src2_sew_mul_bits + code
 
     def fflags_wrapper(code):
         return '''
@@ -328,6 +335,7 @@ def format VectorIntWideningFormat(code, category, *flags) {{
 
     old_vd_idx = 2
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    dest_sew_mul = 2
     src1_reg_id = ""
     if category in ["OPIVV", "OPMVV"]:
         src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
@@ -336,12 +344,12 @@ def format VectorIntWideningFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorIntFormat: %s" % category)
     src2_reg_id = ""
-    src2_dw = False
+    src2_sew_mul = 1
     if inst_suffix in ["vv", "vx"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
     elif inst_suffix in ["wv", "wx"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
-        src2_dw = True
+        src2_sew_mul = 2
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
 
@@ -364,7 +372,7 @@ def format VectorIntWideningFormat(code, category, *flags) {{
         code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code, src2_dw)
+    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul)
 
     vm_decl_rd = ""
     if v0_required:
@@ -420,6 +428,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorIntFormat: %s" % category)
     src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src2_sew_mul = 2
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
     set_src_reg_idx = ""
@@ -432,7 +441,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
-    code = narrowingOpRegisterConstraintChecks(code)
+    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul)
     vm_decl_rd = vmDeclAndReadData()
 
     set_vlenb = setVlenb();
@@ -746,6 +755,7 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     is_destructive_fused = iop.op_class == "SimdFloatMultAccOp"
 
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
+    dest_sew_mul = 2
     src1_reg_id = ""
     if category in ["OPFVV"]:
         src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
@@ -754,12 +764,12 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     else:
         error("not supported category for VectorFloatFormat: %s" % category)
     src2_reg_id = ""
-    src2_dw = False
+    src2_sew_mul = 1
     if inst_suffix in ["vv", "vf"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx / 2]"
     elif inst_suffix in ["wv", "wf"]:
         src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
-        src2_dw = True
+        src2_sew_mul = 2
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
 
@@ -783,7 +793,7 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     code = loopWrapper(code)
     code = fflags_wrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code, src2_dw)
+    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul)
 
     vm_decl_rd = ""
     if v0_required:
@@ -887,6 +897,7 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
     old_vd_idx = 1
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]"
     src2_reg_id = "vecRegClass[_machInst.vs2 + _microIdx]"
+    src2_sew_mul = 2
 
     set_dest_reg_idx = setDestWrapper(dest_reg_id)
 
@@ -898,7 +909,7 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
     code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
     code = fflags_wrapper(code)
-    code = narrowingOpRegisterConstraintChecks(code)
+    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul)
 
     vm_decl_rd = vmDeclAndReadData()
 

From e3b41291da6528faad19556bff7db19ae38410c7 Mon Sep 17 00:00:00 2001
From: Tommaso Marinelli <Tommaso.Marinelli@imec.be>
Date: Thu, 11 Jul 2024 02:07:10 +0200
Subject: [PATCH 4/4] arch-riscv: Check VS1 group for overlap when
 widening/narrowing

Currently, only the VS2 register group is checked for overlap with VD
when executing a widening/narrowing instruction. This commits extends
the check to VS1, when applicable (i.e. vector-vector operations).

Change-Id: I892b7717c01e25546fb41e05afbd08fc40c60c59
---
 src/arch/riscv/isa/formats/vector_arith.isa | 96 ++++++++++++++-------
 1 file changed, 63 insertions(+), 33 deletions(-)

diff --git a/src/arch/riscv/isa/formats/vector_arith.isa b/src/arch/riscv/isa/formats/vector_arith.isa
index 0b3aebe08a..dc831f1b6d 100644
--- a/src/arch/riscv/isa/formats/vector_arith.isa
+++ b/src/arch/riscv/isa/formats/vector_arith.isa
@@ -79,7 +79,28 @@ let {{
             uint32_t ei = i + vtype_VLMAX(vtype, vlen, true) * this->microIdx;
             ''' + code
 
-    def wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul):
+    def wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul,
+        src1_is_vec):
+        def checkOverlap(vreg_name, vreg_emul):
+            check_code = '''
+            if ((({vreg_emul} < 0) && ({vreg_name} == VD)) ||
+                (({vreg_emul} >= 0) &&
+                 ({vreg_name} < VD + num_microops - (1 << {vreg_emul})) &&
+                 (VD < {vreg_name} + (1 << {vreg_emul})))) {
+                // A destination vector register group can overlap a source
+                // vector register group if the destination EEW is greater than
+                // the source EEW, the source EMUL is at least 1, and the
+                // overlap is in the highest- numbered part of the destination
+                // register group.
+                std::string error =
+                    csprintf("Unsupported overlap in {vreg_name} and VD for "
+                             "Widening op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            '''
+            check_code = check_code.replace("{vreg_name}", vreg_name)
+            check_code = check_code.replace("{vreg_emul}", vreg_emul)
+            return check_code
         src2_sew_mul_bits = src2_sew_mul.bit_length() - 1
         dest_sew_mul_bits = dest_sew_mul.bit_length() - 1
         constraint_checks = '''
@@ -92,27 +113,33 @@ let {{
             }
             ''' % dest_sew_mul_bits
         if src2_sew_mul_bits != dest_sew_mul_bits:
-            constraint_checks += '''
-            const int64_t vs2_emul = vlmul + %d;
-            if (((vs2_emul < 0) && (VS2 == VD)) ||
-                ((vs2_emul >= 0) &&
-                 (VS2 < VD + num_microops - (1 << vs2_emul)) &&
-                 (VD < VS2 + (1 << vs2_emul)))) {
-                // A destination vector register group can overlap a source
-                // vector register group if the destination EEW is greater than
-                // the source EEW, the source EMUL is at least 1, and the
-                // overlap is in the highest- numbered part of the destination
-                // register group.
-                std::string error =
-                    csprintf("Unsupported overlap in Vs2 and Vd for Widening op");
-                return std::make_shared<IllegalInstFault>(error, machInst);
-            }
-            ''' % src2_sew_mul_bits
+            constraint_checks += (
+                "const int64_t vs2_emul = vlmul + %d;" % src2_sew_mul_bits
+            )
+            constraint_checks += checkOverlap("VS2", "vs2_emul")
+        if src1_is_vec:
+            constraint_checks += checkOverlap("VS1", "vlmul")
         return constraint_checks + code
 
-    def narrowingOpRegisterConstraintChecks(code, src2_sew_mul):
+    def narrowingOpRegisterConstraintChecks(code, src2_sew_mul, src1_is_vec):
+        def checkOverlap(vreg_name):
+            check_code = '''
+            if (({vreg_name} < VD) &&
+                (VD <= ({vreg_name} + num_microops - 1))) {
+                // A destination vector register group can overlap a source
+                // vector register group if the destination EEW is smaller than
+                // the source EEW and the overlap is in the lowest-numbered
+                // part of the source register group
+                std::string error =
+                    csprintf("Unsupported overlap in {vreg_name} and VD for "
+                             "Narrowing op");
+                return std::make_shared<IllegalInstFault>(error, machInst);
+            }
+            '''
+            check_code = check_code.replace("{vreg_name}", vreg_name)
+            return check_code
         src2_sew_mul_bits = src2_sew_mul.bit_length() - 1
-        return '''
+        constraint_checks = '''
             const uint32_t num_microops =
                 1 << std::max<int64_t>(0, vlmul + %d);
             if ((machInst.vs2 %% alignToPowerOfTwo(num_microops)) != 0) {
@@ -120,16 +147,11 @@ let {{
                     csprintf("Unaligned VS2 group in Narrowing op");
                 return std::make_shared<IllegalInstFault>(error, machInst);
             }
-            if ((VS2 < VD) && (VD <= (VS2 + num_microops - 1))) {
-                // A destination vector register group can overlap a source
-                // vector register group if the destination EEW is smaller than
-                // the source EEW and the overlap is in the lowest-numbered
-                // part of the source register group
-                std::string error =
-                    csprintf("Unsupported overlap in Vs2 and Vd for Narrowing op");
-                return std::make_shared<IllegalInstFault>(error, machInst);
-            }
-        ''' % src2_sew_mul_bits + code
+        ''' % src2_sew_mul_bits
+        constraint_checks += checkOverlap("VS2")
+        if src1_is_vec:
+            constraint_checks += checkOverlap("VS1")
+        return constraint_checks + code
 
     def fflags_wrapper(code):
         return '''
@@ -337,8 +359,10 @@ def format VectorIntWideningFormat(code, category, *flags) {{
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
     dest_sew_mul = 2
     src1_reg_id = ""
+    src1_is_vec = False
     if category in ["OPIVV", "OPMVV"]:
         src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+        src1_is_vec = True
     elif category in ["OPIVX", "OPMVX"]:
         src1_reg_id = "intRegClass[_machInst.rs1]"
     else:
@@ -372,7 +396,8 @@ def format VectorIntWideningFormat(code, category, *flags) {{
         code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul)
+    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul,
+        src1_is_vec)
 
     vm_decl_rd = ""
     if v0_required:
@@ -419,8 +444,10 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
 
     old_vd_idx = 2
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx / 2]"
+    src1_is_vec = False
     if category in ["OPIVV"]:
         src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+        src1_is_vec = True
     elif category in ["OPIVX"]:
         src1_reg_id = "intRegClass[_machInst.rs1]"
     elif category == "OPIVI":
@@ -441,7 +468,7 @@ def format VectorIntNarrowingFormat(code, category, *flags) {{
     code = maskCondWrapper(code)
     code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
-    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul)
+    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul, src1_is_vec)
     vm_decl_rd = vmDeclAndReadData()
 
     set_vlenb = setVlenb();
@@ -757,8 +784,10 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     dest_reg_id = "vecRegClass[_machInst.vd + _microIdx]"
     dest_sew_mul = 2
     src1_reg_id = ""
+    src1_is_vec = False
     if category in ["OPFVV"]:
         src1_reg_id = "vecRegClass[_machInst.vs1 + _microIdx / 2]"
+        src1_is_vec = True
     elif category in ["OPFVF"]:
         src1_reg_id = "floatRegClass[_machInst.rs1]"
     else:
@@ -793,7 +822,8 @@ def format VectorFloatWideningFormat(code, category, *flags) {{
     code = loopWrapper(code)
     code = fflags_wrapper(code)
 
-    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul)
+    code = wideningOpRegisterConstraintChecks(code, src2_sew_mul, dest_sew_mul,
+        src1_is_vec)
 
     vm_decl_rd = ""
     if v0_required:
@@ -909,7 +939,7 @@ def format VectorFloatNarrowingCvtFormat(code, category, *flags) {{
     code = eiDeclarePrefix(code, widening=True)
     code = loopWrapper(code)
     code = fflags_wrapper(code)
-    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul)
+    code = narrowingOpRegisterConstraintChecks(code, src2_sew_mul, False)
 
     vm_decl_rd = vmDeclAndReadData()