From 8375058e73dfeefe433f89c9fe00e675d9ad095a Mon Sep 17 00:00:00 2001
From: Alexandru Dutu <alexandru.dutu@amd.com>
Date: Fri, 7 Oct 2022 17:22:12 -0700
Subject: [PATCH] arch-vega: Implementing global_atomic_smin

Change-Id: Iffb366190f9e3f7ffbacde5dbb3abc97226926d4
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/64512
Reviewed-by: Matt Sinclair <mattdsinclair@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
Maintainer: Matt Sinclair <mattdsinclair@gmail.com>
---
 src/arch/amdgpu/vega/insts/instructions.cc | 53 +++++++++++++++++++++-
 src/arch/amdgpu/vega/insts/instructions.hh |  2 +
 2 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/src/arch/amdgpu/vega/insts/instructions.cc b/src/arch/amdgpu/vega/insts/instructions.cc
index 987474fbfb..e3639a5901 100644
--- a/src/arch/amdgpu/vega/insts/instructions.cc
+++ b/src/arch/amdgpu/vega/insts/instructions.cc
@@ -44972,8 +44972,59 @@ namespace VegaISA
     void
     Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
     {
-        panicUnimplemented();
+        Wavefront *wf = gpuDynInst->wavefront();
+
+        if (gpuDynInst->exec_mask.none()) {
+            wf->decVMemInstsIssued();
+            wf->decLGKMInstsIssued();
+            return;
+        }
+
+        gpuDynInst->execUnitId = wf->execUnitId;
+        gpuDynInst->latency.init(gpuDynInst->computeUnit());
+        gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
+
+        ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
+        ConstVecOperandU32 data(gpuDynInst, extData.DATA);
+
+        addr.read();
+        data.read();
+
+        calcAddr(gpuDynInst, addr, extData.SADDR, instData.OFFSET);
+
+        for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+            if (gpuDynInst->exec_mask[lane]) {
+                (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
+                    = data[lane];
+            }
+        }
+
+        gpuDynInst->computeUnit()->globalMemoryPipe.
+            issueRequest(gpuDynInst);
     } // execute
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMIN::initiateAcc(GPUDynInstPtr gpuDynInst)
+    {
+        initAtomicAccess<VecElemU32>(gpuDynInst);
+    } // initiateAcc
+
+    void
+    Inst_FLAT__FLAT_ATOMIC_SMIN::completeAcc(GPUDynInstPtr gpuDynInst)
+    {
+        if (isAtomicRet()) {
+            VecOperandU32 vdst(gpuDynInst, extData.VDST);
+
+            for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
+                if (gpuDynInst->exec_mask[lane]) {
+                    vdst[lane] = (reinterpret_cast<VecElemU32*>(
+                        gpuDynInst->d_data))[lane];
+                }
+            }
+
+            vdst.write();
+        }
+    } // completeAcc
     // --- Inst_FLAT__FLAT_ATOMIC_UMIN class methods ---
 
     Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
diff --git a/src/arch/amdgpu/vega/insts/instructions.hh b/src/arch/amdgpu/vega/insts/instructions.hh
index ddf228a76a..8b0c8c43de 100644
--- a/src/arch/amdgpu/vega/insts/instructions.hh
+++ b/src/arch/amdgpu/vega/insts/instructions.hh
@@ -42615,6 +42615,8 @@ namespace VegaISA
         } // getOperandSize
 
         void execute(GPUDynInstPtr) override;
+        void initiateAcc(GPUDynInstPtr) override;
+        void completeAcc(GPUDynInstPtr) override;
     }; // Inst_FLAT__FLAT_ATOMIC_SMIN
 
     class Inst_FLAT__FLAT_ATOMIC_UMIN : public Inst_FLAT