From 943d1f1453dc5489e41f9b05e7d5ab8aef95a413 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@outlook.com>
Date: Mon, 10 Jun 2024 22:16:27 -0500
Subject: [PATCH 1/2] mem-ruby: Fix deadlock in GPU_VIPER when issuing atomic
 requests

When a compute unit issues several requests to the same line,
the requests wait in the L2 if it is a writeback cache. If the line is
invalid initially and the first request is atomic in nature, the L2
cache issues a request to main memory. On data return, the cache line
transitions to M but doesn't wake up the other requests, resulting in
a deadlock. This commit adds a wakeup call on data return for atomics
and fixes potential deadlocks.

Change-Id: I8200ce6e77da7c8b4db285c0cc8b8ca0dfa7d720
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index f6ac25be36..9092222a4d 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -1299,6 +1299,7 @@ machine(MachineType:TCC, "TCC Cache")
     wardb_writeAtomicResponseDirtyBytes;
     pa_performAtomic;
     baplr_sendBypassedAtomicPerformedLocallyResponse;
+    wada_wakeUpAllDependentsAddr;
     dt_deallocateTBE;
     pr_popResponseQueue;
   }

From 42b9a9666ee7b57afb8ae724d816c9a194228bd3 Mon Sep 17 00:00:00 2001
From: Vishnu Ramadas <vramadas@outlook.com>
Date: Tue, 11 Jun 2024 20:35:03 -0500
Subject: [PATCH 2/2] mem-ruby: Add instSeqNum to atomic responses from GPU L2
 caches

This commit adds instSeqNum to the atomic responses in
GPU_VIPER-TCC.sm. This will be useful when debugging issues related to
GPU atomic transactions

Change-Id: Ic05c8e1a1cb230abfca2759b51e5603304aadaa3
---
 src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index 9092222a4d..da4318bcf9 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -606,6 +606,7 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.Destination.add(in_msg.Requestor);
           out_msg.Sender := machineID;
           out_msg.MessageSize := MessageSizeType:Response_Data;
+          out_msg.instSeqNum := in_msg.instSeqNum;
           out_msg.DataBlk := cache_entry.DataBlk;
           out_msg.isGLCSet := in_msg.isGLCSet;
           out_msg.isSLCSet := in_msg.isSLCSet;