From b83457df0bee2cec66bf377d17b95c867d8ef025 Mon Sep 17 00:00:00 2001
From: Matthew Poremba <matthew.poremba@amd.com>
Date: Mon, 26 Dec 2022 09:54:20 -0800
Subject: [PATCH] arch-vega: Implement ds_add_u64

This instruction does an atomic add of an unsigned 64-bit data with a
VGPR and value in LDS atomically without return.

Change-Id: I6a7d6713b256607c4e69ddbdef5c83172493c077
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67075
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 49 ++++++++++++++++++++--
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 3d9808ac7c..a54f426837 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -36088,6 +36088,10 @@ namespace VegaISA
     Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
         : Inst_DS(iFmt, "ds_add_u64")
     {
+        setFlag(MemoryRef);
+        setFlag(GroupSegment);
+        setFlag(AtomicAdd);
+        setFlag(AtomicNoReturn);
     } // Inst_DS__DS_ADD_U64
 
     Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
@@ -36096,14 +36100,53 @@ namespace VegaISA
 
     // --- description from .arch file ---
     // 64b:
-    // tmp = MEM[ADDR];
     // MEM[ADDR] += DATA[0:1];
-    // RETURN_DATA[0:1] = tmp.
     void
     Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(
+                gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
+        ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_DS__DS_ADD_U64::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        Addr offset0 = instData.OFFSET0;
+        Addr offset1 = instData.OFFSET1;
+        Addr offset = (offset1 << 8) | offset0;
+
+        initAtomicAccess<VecElemU64>(gpuDynInst, offset);
+    } // initiateAcc
+
+    void
+    Inst_DS__DS_ADD_U64::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+    } // completeAcc
     // --- Inst_DS__DS_SUB_U64 class methods ---
 
     Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index 05a0002b25..f8fc98b647 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -33079,6 +33079,8 @@ namespace VegaISA
             }
         } // getOperandSize
 
+        void initiateAcc(GPUDynInstPtr gpuDynInst) override;
+        void completeAcc(GPUDynInstPtr gpuDynInst) override;
         void execute(GPUDynInstPtr) override;
     }; // Inst_DS__DS_ADD_U64