diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 0d3f2dc00b..ab9c1cecf2 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -6394,7 +6394,7 @@ namespace VegaISA
             }
         };
 
-        vop2Helper<VecOperandU32>(gpuDynInst, opImpl);
+        vop2Helper<ConstVecOperandU32, VecOperandU32>(gpuDynInst, opImpl);
     } // execute
     // --- Inst_VOP2__V_MUL_HI_U32_U24 class methods ---
 
diff --git a/src/arch/amdgpu/vega/insts/op_encodings.hh b/src/arch/amdgpu/vega/insts/op_encodings.hh
index f1954723af..0f5f502add 100644
--- a/src/arch/amdgpu/vega/insts/op_encodings.hh
+++ b/src/arch/amdgpu/vega/insts/op_encodings.hh
@@ -339,7 +339,7 @@ namespace VegaISA
             return src0_dpp;
         }
 
-        template<typename T>
+        template<typename ConstT, typename T>
         void vop2Helper(GPUDynInstPtr gpuDynInst,
                         void (*fOpImpl)(T&, T&, T&, Wavefront*))
         {
@@ -359,7 +359,19 @@ namespace VegaISA
                 T src0_dpp = dppHelper(gpuDynInst, src1);
                 fOpImpl(src0_dpp, src1, vdst, wf);
             } else {
-                fOpImpl(src0, src1, vdst, wf);
+                // src0 is unmodified. We need to use the const container
+                // type to allow reading scalar operands from src0. Only
+                // src0 can index scalar operands. We copy this to vdst
+                // temporarily to pass to the lambda so the instruction
+                // does not need to write two lambda functions (one for
+                // a const src0 and one of a mutable src0).
+                ConstT const_src0(gpuDynInst, instData.SRC0);
+                const_src0.readSrc();
+
+                for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                    vdst[lane] = const_src0[lane];
+                }
+                fOpImpl(vdst, src1, vdst, wf);
             }
 
             vdst.write();