diff --git a/src/arch/amdgpu/vega/gpu_decoder.cc b/src/arch/amdgpu/vega/gpu_decoder.cc
index 0f4b1e9872..e07a392ced 100644
--- a/src/arch/amdgpu/vega/gpu_decoder.cc
+++ b/src/arch/amdgpu/vega/gpu_decoder.cc
@@ -3144,7 +3144,7 @@ namespace VegaISA
         &Decoder::decode_OP_VOP1__V_SAT_PK_U8_I16,
         &Decoder::decode_invalid,
         &Decoder::decode_OP_VOP1__V_SWAP_B32,
-        &Decoder::decode_invalid,
+        &Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32,
         &Decoder::decode_invalid,
         &Decoder::decode_invalid,
         &Decoder::decode_invalid,
@@ -8622,7 +8622,6 @@ namespace VegaISA
     Decoder::decode_OP_GLOBAL__GLOBAL_STORE_DWORD(MachInst iFmt)
     {
         return new Inst_FLAT__FLAT_STORE_DWORD(&iFmt->iFmt_FLAT);
-        return nullptr;
     }
 
     GPUStaticInst*
@@ -9898,29 +9897,25 @@ namespace VegaISA
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_UBYTE(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_LOAD_UBYTE(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SBYTE(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_LOAD_SBYTE(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_USHORT(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_LOAD_USHORT(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_LOAD_SSHORT(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_LOAD_SSHORT(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
@@ -9950,8 +9945,7 @@ namespace VegaISA
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_STORE_BYTE(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_STORE_BYTE(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
@@ -9964,8 +9958,7 @@ namespace VegaISA
     GPUStaticInst*
     Decoder::decode_OP_SCRATCH__SCRATCH_STORE_SHORT(MachInst iFmt)
     {
-        fatal("Trying to decode instruction without a class\n");
-        return nullptr;
+        return new Inst_FLAT__FLAT_STORE_SHORT(&iFmt->iFmt_FLAT);
     }
 
     GPUStaticInst*
@@ -11784,6 +11777,12 @@ namespace VegaISA
         return nullptr;
     }
 
+    GPUStaticInst*
+    Decoder::decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst iFmt)
+    {
+        return new Inst_VOP1__V_ACCVGPR_MOV_B32(&iFmt->iFmt_VOP1);
+    }
+
     GPUStaticInst*
     Decoder::decode_OP_VOPC__V_CMP_CLASS_F32(MachInst iFmt)
     {
diff --git a/src/arch/amdgpu/vega/gpu_decoder.hh b/src/arch/amdgpu/vega/gpu_decoder.hh
index 8094233bd8..2523734ce5 100644
--- a/src/arch/amdgpu/vega/gpu_decoder.hh
+++ b/src/arch/amdgpu/vega/gpu_decoder.hh
@@ -1314,6 +1314,7 @@ namespace VegaISA
         GPUStaticInst* decode_OP_VOP1__V_CVT_NORM_U16_F16(MachInst);
         GPUStaticInst* decode_OP_VOP1__V_SAT_PK_U8_I16(MachInst);
         GPUStaticInst* decode_OP_VOP1__V_SWAP_B32(MachInst);
+        GPUStaticInst* decode_OP_VOP1__V_ACCVGPR_MOV_B32(MachInst);
         GPUStaticInst* decode_OP_VOP2__V_CNDMASK_B32(MachInst);
         GPUStaticInst* decode_OP_VOP2__V_ADD_F32(MachInst);
         GPUStaticInst* decode_OP_VOP2__V_SUB_F32(MachInst);
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 9d91526f3f..4e71f13ad4 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -10562,6 +10562,38 @@ namespace VegaISA
         void execute(GPUDynInstPtr) override;
     }; // Inst_VOP1__V_LOG_LEGACY_F32
 
+    class Inst_VOP1__V_ACCVGPR_MOV_B32 : public Inst_VOP1
+    {
+      public:
+        Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1*);
+        ~Inst_VOP1__V_ACCVGPR_MOV_B32();
+
+        int
+        getNumOperands() override
+        {
+            return numDstRegOperands() + numSrcRegOperands();
+        } // getNumOperands
+
+        int numDstRegOperands() override { return 1; }
+        int numSrcRegOperands() override { return 1; }
+
+        int
+        getOperandSize(int opIdx) override
+        {
+            switch (opIdx) {
+              case 0: //src
+                return 4;
+              case 1: //vdst
+                return 4;
+              default:
+                fatal("op idx %i out of bounds\n", opIdx);
+                return -1;
+            }
+        } // getOperandSize
+
+        void execute(GPUDynInstPtr) override;
+    }; // Inst_VOP1__V_ACCVGPR_MOV_B32
+
     class Inst_VOPC__V_CMP_CLASS_F32 : public Inst_VOPC
     {
       public:
diff --git a/src/arch/amdgpu/vega/insts/vop1.cc b/src/arch/amdgpu/vega/insts/vop1.cc
index 3bbf1e0085..f970923951 100644
--- a/src/arch/amdgpu/vega/insts/vop1.cc
+++ b/src/arch/amdgpu/vega/insts/vop1.cc
@@ -2397,6 +2397,38 @@ namespace VegaISA
             }
         }
 
+        vdst.write();
+    } // execute
+    // --- Inst_VOP1__V_ACCVGPR_MOV_B32 class methods ---
+
+    Inst_VOP1__V_ACCVGPR_MOV_B32::
+        Inst_VOP1__V_ACCVGPR_MOV_B32(InFmt_VOP1 *iFmt)
+        : Inst_VOP1(iFmt, "v_accvgpr_mov_b32")
+    {
+        setFlag(ALU);
+    } // Inst_VOP1__V_ACCVGPR_MOV_B32
+
+    Inst_VOP1__V_ACCVGPR_MOV_B32::~Inst_VOP1__V_ACCVGPR_MOV_B32()
+    {
+    } // ~Inst_VOP1__V_ACCVGPR_MOV_B32
+
+    void
+    Inst_VOP1__V_ACCVGPR_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
+    {
+        Wavefront *wf = gpuDynInst->wavefront();
+        unsigned accum_offset = wf->accumOffset;
+
+        ConstVecOperandU32 src(gpuDynInst, instData.SRC0+accum_offset);
+        VecOperandU32 vdst(gpuDynInst, instData.VDST+accum_offset);
+
+        src.readSrc();
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (wf->execMask(lane)) {
+                vdst[lane] = src[lane];
+            }
+        }
+
         vdst.write();
     } // execute
 } // namespace VegaISA
diff --git a/src/arch/amdgpu/vega/operand.hh b/src/arch/amdgpu/vega/operand.hh
index 593f0e34fd..1bb9b43d1f 100644
--- a/src/arch/amdgpu/vega/operand.hh
+++ b/src/arch/amdgpu/vega/operand.hh
@@ -579,8 +579,30 @@ namespace VegaISA
               case REG_SRC_SWDA:
               case REG_SRC_DPP:
               case REG_SRC_LITERAL:
-                assert(NumDwords == 1);
+                /**
+                 * From the Vega specification:
+                 * When a literal constant is used with a 64 bit instruction,
+                 * the literal is expanded to 64 bits by: padding the LSBs
+                 * with zeros for floats, padding the MSBs with zeros for
+                 * unsigned ints, and by sign-extending signed ints.
+                 */
                 srfData[0] = _gpuDynInst->srcLiteral();
+                if constexpr (NumDwords == 2) {
+                    if constexpr (std::is_integral_v<DataType>) {
+                        if constexpr (std::is_signed_v<DataType>) {
+                            if (bits(srfData[0], 31, 31) == 1) {
+                                srfData[1] = 0xffffffff;
+                            } else {
+                                srfData[1] = 0;
+                            }
+                        } else {
+                            srfData[1] = 0;
+                        }
+                    } else {
+                        srfData[1] = _gpuDynInst->srcLiteral();
+                        srfData[0] = 0;
+                    }
+                }
                 break;
               case REG_SHARED_BASE:
                 {