diff --git a/src/mem/ruby/common/DataBlock.cc b/src/mem/ruby/common/DataBlock.cc
index f70aa79fd4..70d9bc332a 100644
--- a/src/mem/ruby/common/DataBlock.cc
+++ b/src/mem/ruby/common/DataBlock.cc
@@ -51,9 +51,19 @@ namespace ruby
 
 DataBlock::DataBlock(const DataBlock &cp)
 {
-    m_data = new uint8_t[RubySystem::getBlockSizeBytes()];
-    memcpy(m_data, cp.m_data, RubySystem::getBlockSizeBytes());
+    uint8_t *block_update;
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    m_data = new uint8_t[block_bytes];
+    memcpy(m_data, cp.m_data, block_bytes);
     m_alloc = true;
+    // If this data block is involved in an atomic operation, the effect
+    // of applying the atomic operations on the data block are recorded in
+    // m_atomicLog. If so, we must copy over every entry in the change log
+    for (size_t i = 0; i < cp.m_atomicLog.size(); i++) {
+        block_update = new uint8_t[block_bytes];
+        memcpy(block_update, cp.m_atomicLog[i], block_bytes);
+        m_atomicLog.push_back(block_update);
+    }
 }
 
 void
@@ -73,7 +83,20 @@ DataBlock::clear()
 bool
 DataBlock::equal(const DataBlock& obj) const
 {
-    return !memcmp(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    // Check that the block contents match
+    if (memcmp(m_data, obj.m_data, block_bytes)) {
+        return false;
+    }
+    if (m_atomicLog.size() != obj.m_atomicLog.size()) {
+        return false;
+    }
+    for (size_t i = 0; i < m_atomicLog.size(); i++) {
+        if (memcmp(m_atomicLog[i], obj.m_atomicLog[i], block_bytes)) {
+            return false;
+        }
+    }
+    return true;
 }
 
 void
@@ -92,7 +115,7 @@ DataBlock::atomicPartial(const DataBlock &dblk, const WriteMask &mask)
     for (int i = 0; i < RubySystem::getBlockSizeBytes(); i++) {
         m_data[i] = dblk.m_data[i];
     }
-    mask.performAtomic(m_data);
+    mask.performAtomic(m_data, m_atomicLog);
 }
 
 void
@@ -107,6 +130,28 @@ DataBlock::print(std::ostream& out) const
     out << std::dec << "]" << std::flush;
 }
 
+int
+DataBlock::numAtomicLogEntries() const
+{
+    return m_atomicLog.size();
+}
+uint8_t*
+DataBlock::popAtomicLogEntryFront()
+{
+    assert(m_atomicLog.size() > 0);
+    auto ret = m_atomicLog.front();
+    m_atomicLog.pop_front();
+    return ret;
+}
+void
+DataBlock::clearAtomicLogEntries()
+{
+    for (auto log : m_atomicLog) {
+        delete [] log;
+    }
+    m_atomicLog.clear();
+}
+
 const uint8_t*
 DataBlock::getData(int offset, int len) const
 {
@@ -137,7 +182,18 @@ DataBlock::setData(PacketPtr pkt)
 DataBlock &
 DataBlock::operator=(const DataBlock & obj)
 {
-    memcpy(m_data, obj.m_data, RubySystem::getBlockSizeBytes());
+    uint8_t *block_update;
+    size_t block_bytes = RubySystem::getBlockSizeBytes();
+    // Copy entire block contents from obj to current block
+    memcpy(m_data, obj.m_data, block_bytes);
+    // If this data block is involved in an atomic operation, the effect
+    // of applying the atomic operations on the data block are recorded in
+    // m_atomicLog. If so, we must copy over every entry in the change log
+    for (size_t i = 0; i < obj.m_atomicLog.size(); i++) {
+        block_update = new uint8_t[block_bytes];
+        memcpy(block_update, obj.m_atomicLog[i], block_bytes);
+        m_atomicLog.push_back(block_update);
+    }
     return *this;
 }
 
diff --git a/src/mem/ruby/common/DataBlock.hh b/src/mem/ruby/common/DataBlock.hh
index e147d701c5..aa94f56eb8 100644
--- a/src/mem/ruby/common/DataBlock.hh
+++ b/src/mem/ruby/common/DataBlock.hh
@@ -44,6 +44,7 @@
 #include <inttypes.h>
 
 #include <cassert>
+#include <deque>
 #include <iomanip>
 #include <iostream>
 
@@ -71,6 +72,12 @@ class DataBlock
     {
         if (m_alloc)
             delete [] m_data;
+
+        // If data block involved in atomic
+        // operations, free all meta data
+        for (auto log : m_atomicLog) {
+            delete [] log;
+        }
     }
 
     DataBlock& operator=(const DataBlock& obj);
@@ -80,6 +87,9 @@ class DataBlock
     void clear();
     uint8_t getByte(int whichByte) const;
     const uint8_t *getData(int offset, int len) const;
+    uint8_t* popAtomicLogEntryFront();
+    int numAtomicLogEntries() const;
+    void clearAtomicLogEntries();
     uint8_t *getDataMod(int offset);
     void setByte(int whichByte, uint8_t data);
     void setData(const uint8_t *data, int offset, int len);
@@ -94,6 +104,9 @@ class DataBlock
     void alloc();
     uint8_t *m_data;
     bool m_alloc;
+
+    // Tracks block changes when atomic ops are applied
+    std::deque<uint8_t*> m_atomicLog;
 };
 
 inline void
diff --git a/src/mem/ruby/common/WriteMask.cc b/src/mem/ruby/common/WriteMask.cc
index 4c24a64706..911262b4ba 100644
--- a/src/mem/ruby/common/WriteMask.cc
+++ b/src/mem/ruby/common/WriteMask.cc
@@ -55,5 +55,27 @@ WriteMask::print(std::ostream& out) const
         << std::flush;
 }
 
+void
+WriteMask::performAtomic(uint8_t * p,
+        std::deque<uint8_t*>& log) const
+{
+    int offset;
+    uint8_t *block_update;
+    // Here, operations occur in FIFO order from the mAtomicOp
+    // vector. This is done to match the ordering of packets
+    // that was seen when the initial coalesced request was created.
+    for (int i = 0; i < mAtomicOp.size(); i++) {
+        // Save the old value of the data block in case a
+        // return value is needed
+        block_update = new uint8_t[mSize];
+        std::memcpy(block_update, p, mSize);
+        log.push_back(block_update);
+        // Perform the atomic operation
+        offset = mAtomicOp[i].first;
+        AtomicOpFunctor *fnctr = mAtomicOp[i].second;
+        (*fnctr)(&p[offset]);
+    }
+}
+
 } // namespace ruby
 } // namespace gem5
diff --git a/src/mem/ruby/common/WriteMask.hh b/src/mem/ruby/common/WriteMask.hh
index 2de21da79b..47ec798500 100644
--- a/src/mem/ruby/common/WriteMask.hh
+++ b/src/mem/ruby/common/WriteMask.hh
@@ -222,26 +222,15 @@ class WriteMask
 
     void print(std::ostream& out) const;
 
-    void
-    performAtomic(uint8_t * p) const
-    {
-        for (int i = 0; i < mAtomicOp.size(); i++) {
-            int offset = mAtomicOp[i].first;
-            AtomicOpFunctor *fnctr = mAtomicOp[i].second;
-            (*fnctr)(&p[offset]);
-        }
-    }
-
-    void
-    performAtomic(DataBlock & blk) const
-    {
-        for (int i = 0; i < mAtomicOp.size(); i++) {
-            int offset = mAtomicOp[i].first;
-            uint8_t *p = blk.getDataMod(offset);
-            AtomicOpFunctor *fnctr = mAtomicOp[i].second;
-            (*fnctr)(p);
-        }
-    }
+    /*
+     * Performs atomic operations on the data block pointed to by p. The
+     * atomic operations to perform are in the vector mAtomicOp. The
+     * effect of each atomic operation is pushed to the atomicChangeLog
+     * so that each individual atomic requestor may see the results of their
+     * specific atomic operation.
+     */
+    void performAtomic(uint8_t * p,
+            std::deque<uint8_t*>& atomicChangeLog) const;
 
     const AtomicOpVector&
     getAtomicOps() const
diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
index 31fc484973..20a0979af1 100644
--- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
+++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm
@@ -523,6 +523,7 @@ machine(MachineType:TCC, "TCC Cache")
           out_msg.isSLCSet := in_msg.isSLCSet;
         }
     }
+    cache_entry.DataBlk.clearAtomicLogEntries();
   }
 
   action(bar_sendBypassedAtomicResponse, "bar", desc="send bypassed Atomic Ack") {
diff --git a/src/mem/ruby/protocol/RubySlicc_Exports.sm b/src/mem/ruby/protocol/RubySlicc_Exports.sm
index a32983ada4..2e496a8221 100644
--- a/src/mem/ruby/protocol/RubySlicc_Exports.sm
+++ b/src/mem/ruby/protocol/RubySlicc_Exports.sm
@@ -77,6 +77,8 @@ structure(DataBlock, external = "yes", desc="..."){
   void copyPartial(DataBlock, int, int);
   void copyPartial(DataBlock, WriteMask);
   void atomicPartial(DataBlock, WriteMask);
+  int numAtomicLogEntries();
+  void clearAtomicLogEntries();
 }
 
 bool testAndRead(Addr addr, DataBlock datablk, Packet *pkt);
diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc
index 8bde3f7bc8..beb8da3f9c 100644
--- a/src/mem/ruby/system/GPUCoalescer.cc
+++ b/src/mem/ruby/system/GPUCoalescer.cc
@@ -554,25 +554,48 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
                       success, isRegion);
     // update the data
     //
-    // MUST AD DOING THIS FOR EACH REQUEST IN COALESCER
+    // MUST ADD DOING THIS FOR EACH REQUEST IN COALESCER
     std::vector<PacketPtr> pktList = crequest->getPackets();
+
+    uint8_t* log = nullptr;
     DPRINTF(GPUCoalescer, "Responding to %d packets for addr 0x%X\n",
             pktList.size(), request_line_address);
+    uint32_t offset;
+    int pkt_size;
     for (auto& pkt : pktList) {
-        request_address = pkt->getAddr();
+        offset = getOffset(pkt->getAddr());
+        pkt_size = pkt->getSize();
         if (pkt->getPtr<uint8_t>()) {
-            if ((type == RubyRequestType_LD) ||
-                (type == RubyRequestType_ATOMIC) ||
-                (type == RubyRequestType_ATOMIC_RETURN) ||
-                (type == RubyRequestType_IFETCH) ||
-                (type == RubyRequestType_RMW_Read) ||
-                (type == RubyRequestType_Locked_RMW_Read) ||
-                (type == RubyRequestType_Load_Linked)) {
-                pkt->setData(
-                    data.getData(getOffset(request_address), pkt->getSize()));
-            } else {
-                data.setData(pkt->getPtr<uint8_t>(),
-                             getOffset(request_address), pkt->getSize());
+            switch(type) {
+                // Store and AtomicNoReturns follow the same path, as the
+                // data response is not needed.
+                case RubyRequestType_ATOMIC_NO_RETURN:
+                    assert(pkt->isAtomicOp());
+                case RubyRequestType_ST:
+                    data.setData(pkt->getPtr<uint8_t>(), offset, pkt_size);
+                    break;
+                case RubyRequestType_LD:
+                    pkt->setData(data.getData(offset, pkt_size));
+                    break;
+                case RubyRequestType_ATOMIC_RETURN:
+                    assert(pkt->isAtomicOp());
+                    // Atomic operations are performed by the WriteMask
+                    // in packet order, set by the crequest. Thus, when
+                    // unpacking the changes from the log, we read from
+                    // the front of the log to correctly map response
+                    // data into the packets.
+
+                    // Log entry contains the old value before the current
+                    // atomic operation occurred.
+                    log = data.popAtomicLogEntryFront();
+                    pkt->setData(&log[offset]);
+                    delete [] log;
+                    log = nullptr;
+                    break;
+                default:
+                    panic("Unsupported ruby packet type:%s\n",
+                                    RubyRequestType_to_string(type));
+                    break;
             }
         } else {
             DPRINTF(MemoryAccess,
@@ -581,6 +604,7 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest,
                     RubyRequestType_to_string(type));
         }
     }
+    assert(data.numAtomicLogEntries() == 0);
 
     m_outstanding_count--;
     assert(m_outstanding_count >= 0);