From 6695e5ef70a3fd08b69e04344945571d04165019 Mon Sep 17 00:00:00 2001
From: Mahyar Samani <msamani@ucdavis.edu>
Date: Tue, 14 May 2024 17:35:15 -0700
Subject: [PATCH 1/2] cpu: Adding SpatterGen

This change adds source code for SpatterGen ClockedObject.
The set of source code pushed includes code for SpatterKernel
that tracks whether information is being gathered or scattered
as well as the list of indices to be accessed. This model
has PyBindMethod to add SpatterKernels from python.
This way all the preparations for kernels can be done in python.
SpatterGen has a few parameters that model limits on a few of
hardware resources in the backend of a processor, e.g. number
of functional units to calculate effective address, the latency
of calculating effective address, number of integer registers.

Change-Id: I451ffb385180a914e884cab220928c5f1944b2e3
---
 src/cpu/testers/spatter_gen/SConscript        |  38 ++
 src/cpu/testers/spatter_gen/SpatterGen.py     | 115 ++++
 src/cpu/testers/spatter_gen/spatter_gen.cc    | 582 ++++++++++++++++++
 src/cpu/testers/spatter_gen/spatter_gen.hh    | 252 ++++++++
 .../testers/spatter_gen/utility_structs.hh    | 242 ++++++++
 5 files changed, 1229 insertions(+)
 create mode 100644 src/cpu/testers/spatter_gen/SConscript
 create mode 100644 src/cpu/testers/spatter_gen/SpatterGen.py
 create mode 100644 src/cpu/testers/spatter_gen/spatter_gen.cc
 create mode 100644 src/cpu/testers/spatter_gen/spatter_gen.hh
 create mode 100644 src/cpu/testers/spatter_gen/utility_structs.hh

diff --git a/src/cpu/testers/spatter_gen/SConscript b/src/cpu/testers/spatter_gen/SConscript
new file mode 100644
index 0000000000..86231409dd
--- /dev/null
+++ b/src/cpu/testers/spatter_gen/SConscript
@@ -0,0 +1,38 @@
+# Copyright (c) 2024 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Import("*")
+
+SimObject(
+    "SpatterGen.py",
+    sim_objects=["SpatterGen"],
+    enums=["SpatterKernelType", "SpatterProcessingMode"],
+)
+
+Source("spatter_gen.cc")
+
+DebugFlag("SpatterGen")
+DebugFlag("SpatterKernel")
diff --git a/src/cpu/testers/spatter_gen/SpatterGen.py b/src/cpu/testers/spatter_gen/SpatterGen.py
new file mode 100644
index 0000000000..1c88f867ca
--- /dev/null
+++ b/src/cpu/testers/spatter_gen/SpatterGen.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2024 The Regents of The University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from m5.citations import add_citation
+from m5.objects.ClockedObject import ClockedObject
+from m5.params import *
+from m5.proxy import *
+from m5.util.pybind import PyBindMethod
+
+
+class SpatterKernelType(Enum):
+    vals = ["scatter", "gather"]
+
+
+class SpatterProcessingMode(Enum):
+    vals = ["synchronous", "asynchronous"]
+
+
+class SpatterGen(ClockedObject):
+    type = "SpatterGen"
+    cxx_header = "cpu/testers/spatter_gen/spatter_gen.hh"
+    cxx_class = "gem5::SpatterGen"
+
+    system = Param.System(Parent.any, "System this SpatterGen is a part of.")
+
+    processing_mode = Param.SpatterProcessingMode(
+        "How to process kernels accross multiple SpatterGen cores. "
+        "Whether to synchronize on kernel boundaries or not."
+    )
+
+    port = RequestPort("Port to send memory requests.")
+
+    int_regfile_size = Param.Int("Size of the integer register file.")
+    fp_regfile_size = Param.Int("Size of the floating point register file.")
+    request_gen_latency = Param.Cycles(
+        "Number of cycles to spend for creating a request."
+    )
+    request_gen_rate = Param.Int("Number of requests generate per cycle.")
+    request_buffer_entries = Param.Int("Size of the request buffer.")
+    send_rate = Param.Int(
+        "Number of requests to send in parallel."
+        "Emulates the number of dcache ports."
+    )
+
+    cxx_exports = [
+        PyBindMethod("addKernel"),
+        PyBindMethod("proceedPastSyncPoint"),
+    ]
+
+
+add_citation(
+    SpatterGen,
+    """@inproceedings{10.1145/3422575.3422794,
+author = {Lavin, Patrick and Young, Jeffrey and Vuduc, Richard and Riedy,
+Jason and Vose, Aaron and Ernst, Daniel},
+title = {Evaluating Gather and Scatter Performance on CPUs and GPUs},
+year = {2021},
+isbn = {9781450388993},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3422575.3422794},
+doi = {10.1145/3422575.3422794},
+abstract = {This paper describes a new benchmark tool,
+Spatter, for assessing memory system architectures in the context of a
+specific category of indexed accesses known as gather and scatter.
+These types of operations are increasingly used to express sparse and
+irregular data access patterns, and they have widespread utility in many
+modern HPC applications including scientific simulations, data mining and
+analysis computations, and graph processing. However, many traditional
+benchmarking tools like STREAM, STRIDE, and GUPS focus on characterizing
+only uniform stride or fully random accesses despite evidence that modern
+applications use varied sets of more complex access patterns. Spatter is an
+open-source benchmark that provides a tunable and configurable framework to
+benchmark a variety of indexed access patterns, including variations of gather
+/ scatter that are seen in HPC mini-apps evaluated in this work. The design of
+Spatter includes backends for OpenMP and CUDA, and experiments show how it can
+be used to evaluate 1) uniform access patterns for CPU and GPU, 2) prefetching
+regimes for gather / scatter, 3) compiler implementations of vectorization for
+gather / scatter, and 4) trace-driven “proxy patterns” that reflect the
+patterns found in multiple applications. The results from Spatter experiments
+show, for instance, that GPUs typically outperform CPUs for these operations
+in absolute bandwidth but not fraction of peak bandwidth, and that Spatter can
+better represent the performance of some cache-dependent mini-apps than
+traditional STREAM bandwidth measurements.},
+booktitle = {Proceedings of the International Symposium on Memory Systems},
+pages = {209–222},
+numpages = {14},
+location = {Washington, DC, USA},
+series = {MEMSYS '20}
+}
+""",
+)
diff --git a/src/cpu/testers/spatter_gen/spatter_gen.cc b/src/cpu/testers/spatter_gen/spatter_gen.cc
new file mode 100644
index 0000000000..b57259911b
--- /dev/null
+++ b/src/cpu/testers/spatter_gen/spatter_gen.cc
@@ -0,0 +1,582 @@
+/*
+* Copyright (c) 2024 The Regents of The University of California
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met: redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer;
+* redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution;
+* neither the name of the copyright holders nor the names of its
+* contributors may be used to endorse or promote products derived from
+* this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "cpu/testers/spatter_gen/spatter_gen.hh"
+
+#include "base/cprintf.hh"
+#include "debug/SpatterGen.hh"
+#include "debug/SpatterKernel.hh"
+#include "enums/SpatterKernelType.hh"
+#include "enums/SpatterProcessingMode.hh"
+#include "mem/packet.hh"
+#include "sim/sim_exit.hh"
+#include "sim/system.hh"
+
+namespace gem5
+{
+
+using enums::SpatterKernelTypeStrings;
+using enums::SpatterProcessingMode;
+
+SpatterGen::SpatterGen(const Params& params):
+    ClockedObject(params),
+    state(SpatterGenState::RUNNING),
+    requestorId(params.system->getRequestorId(this)),
+    numPendingMemRequests(0),
+    stats(this),
+    mode(params.processing_mode),
+    port(this, name() + ".port"),
+    intRegFileSize(params.int_regfile_size), intRegUsed(0),
+    fpRegFileSize(params.fp_regfile_size), fpRegUsed(0),
+    requestGenLatency(params.request_gen_latency),
+    requestGenRate(params.request_gen_rate),
+    firstGeneratorAvailableTime(0),
+    nextGenEvent([this](){ processNextGenEvent(); }, name() + ".GenEvent"),
+    requestBufferEntries(params.request_buffer_entries),
+    requestBuffer(clockPeriod()),
+    sendRate(params.send_rate),
+    firstPortAvailableTime(0),
+    nextSendEvent([this](){ processNextSendEvent(); }, name() + ".SendEvent"),
+    receiveBuffer(clockPeriod())
+{
+    fatal_if(fpRegFileSize < requestBufferEntries,
+            "fp_regfile_size should be >= request_buffer_entries."
+            "if request_buffer_entries is bigger than fp_regfile_size,"
+            "it may result in inaccuracies in your simulation."
+            "Ideally: fp_regfile_size >> request_buffer_entries."
+    );
+    generatorBusyUntil.resize(requestGenRate, 0);
+    portBusyUntil.resize(sendRate, 0);
+}
+
+Port&
+SpatterGen::getPort(const std::string& if_name, PortID idx)
+{
+    if (if_name == "port") {
+        return port;
+    } else {
+        return ClockedObject::getPort(if_name, idx);
+    }
+}
+
+void
+SpatterGen::startup()
+{
+    scheduleNextGenEvent(curTick());
+}
+
+void
+SpatterGen::SpatterGenPort::sendPacket(PacketPtr pkt)
+{
+    panic_if(blocked(), "Should never try to send if port is blocked.");
+    if (!sendTimingReq(pkt)) {
+        blockedPacket = pkt;
+        DPRINTF(
+            SpatterGen,
+            "%s: Port blocked when sending %s.\n",
+            __func__, pkt->print()
+        );
+    }
+}
+
+void
+SpatterGen::SpatterGenPort::recvReqRetry()
+{
+    DPRINTF(SpatterGen, "%s: Port received a ReqRetry.\n", __func__);
+    panic_if(
+            blockedPacket == nullptr,
+            "Received reqRetry with no blocked packet."
+            );
+    if (!sendTimingReq(blockedPacket)) {
+        DPRINTF(
+            SpatterGen,
+            "%s: Port blocked when sending %s.\n",
+            __func__, blockedPacket->print()
+        );
+    } else {
+        blockedPacket = nullptr;
+        owner->recvReqRetry();
+    }
+}
+
+void
+SpatterGen::recvReqRetry()
+{
+    if (nextSendEvent.pending()) {
+        nextSendEvent.wake();
+        scheduleNextSendEvent(nextCycle());
+    }
+}
+
+bool
+SpatterGen::SpatterGenPort::recvTimingResp(PacketPtr pkt) {
+    return owner->recvTimingResp(pkt);
+}
+
+bool
+SpatterGen::recvTimingResp(PacketPtr pkt)
+{
+    DPRINTF(SpatterGen, "%s: Received pkt: %s.\n", __func__, pkt->print());
+    assert(pkt->isResponse());
+
+    // record trip time.
+    SpatterAccess* spatter_access = pkt->findNextSenderState<SpatterAccess>();
+    Tick trip_time = (curTick() - requestDepartureTime[pkt->req]);
+    requestDepartureTime.erase(pkt->req);
+    spatter_access->recordTripTime(trip_time);
+
+    int trips_left = spatter_access->tripsLeft();
+    assert(trips_left >= 0);
+    if (trips_left > 0) {
+        stats.numIndexReads++;
+        stats.indexBytesRead += pkt->getSize();
+        stats.totalIndexReadLatency += trip_time;
+
+        stats.indexAccessLatency.sample(trip_time);
+        receiveBuffer.push(spatter_access, curTick());
+    } else {
+        stats.valueAccessLatency.sample(trip_time);
+        stats.totalIndirectAccessLatency.sample(
+                                            spatter_access->tripTimeSoFar()
+                                            );
+        if (spatter_access->type() == SpatterKernelType::gather) {
+            stats.numValueReads++;
+            stats.valueBytesRead += pkt->getSize();
+            stats.totalValueReadLatency += trip_time;
+        } else if (spatter_access->type() == SpatterKernelType::scatter) {
+            stats.numValueWrites++;
+            stats.valueBytesWritten += pkt->getSize();
+            stats.totalValueWriteLatency += trip_time;
+        } else {
+            panic("Unknown kernel type.");
+        }
+        // CAUTION: We're going to decrement fpRegUsed here,
+        // it could cause inaccuracies if processNextGenEvent
+        // is called after recvTimingResp on the same tick.
+        // i.e. we might end up releasing a register on the same
+        // cycle that we are allocating it.
+        // it's probably not going to ever be an issue since
+        // fpRegFileSize is probably >> requestBufferEntries
+        // i.e. the chances of running out of fp registers is low because
+        // we do not simulate parts of the pipeline that back things up into
+        // fp registers, e.g. functional units of ALU.
+        fpRegUsed--;
+        delete spatter_access;
+    }
+
+    // delete the pkt since we don't need it anymore.
+    delete pkt;
+
+    if (!nextGenEvent.pending()) {
+        scheduleNextGenEvent(nextCycle());
+    }
+
+    numPendingMemRequests--;
+    checkForSimExit();
+    return true;
+}
+
+void
+SpatterGen::addKernel(
+    uint32_t id, uint32_t delta, uint32_t count,
+    SpatterKernelType type,
+    size_t index_size, Addr base_index_addr,
+    size_t value_size, Addr base_value_addr,
+    const std::vector<uint32_t>& indices
+)
+{
+    DPRINTF(
+        SpatterGen,
+        "%s: Adding kernel with id: %d, delta: %d, count: %d, type: %s.\n",
+        __func__, id, delta, count, SpatterKernelTypeStrings[type]
+    );
+    SpatterKernel new_kernel(
+                            requestorId,
+                            id, delta, count, type,
+                            index_size, base_index_addr,
+                            value_size, base_value_addr
+                            );
+    new_kernel.setIndices(indices);
+    kernels.push(new_kernel);
+}
+
+void
+SpatterGen::proceedPastSyncPoint()
+{
+    assert(mode == SpatterProcessingMode::synchronous);
+    assert(state == SpatterGenState::WAITING);
+    state = SpatterGenState::RUNNING;
+    scheduleNextGenEvent(nextCycle());
+}
+
+void
+SpatterGen::checkForSimExit()
+{
+    bool no_pending = numPendingMemRequests == 0;
+    bool no_queued = requestBuffer.empty();
+    int avail_int_regs = intRegFileSize - intRegUsed;
+    int avail_fp_regs = fpRegFileSize - fpRegUsed;
+    bool can_do_init = initAccessOk(avail_int_regs, avail_fp_regs, curTick());
+    bool can_do_mid = interAccessOk(avail_int_regs, avail_fp_regs, curTick());
+    bool can_do_ult = ultAccessOk(avail_int_regs, avail_fp_regs, curTick());
+    if (!can_do_init && !can_do_mid && !can_do_ult && no_pending && no_queued)
+    {
+        assert((
+                (mode == SpatterProcessingMode::synchronous) &&
+                (state == SpatterGenState::DRAINING)
+                ) ||
+                mode == SpatterProcessingMode::asynchronous
+            );
+        state = SpatterGenState::WAITING;
+        exitSimLoop(
+            csprintf("%s received all expected responses.", name()),
+            0,
+            nextCycle()
+        );
+    }
+}
+
+bool
+SpatterGen::initAccessOk(int int_regs, int fp_regs, Tick when) const
+{
+    bool have_int_reg = int_regs > 0;
+    // for mode == SpatterProcessingMode::asynchronous state will always be
+    // SpatterGenState::RUNNING. we don't have to do checks for mode.
+    // for mode == SpatterProcessingMode::synchronous, if state is
+    // SpatterGenState::DRAINING or SpatterGenState::WAITING
+    // we can't initiate any new indirect accesses.
+    bool have_kernel = !kernels.empty() && (state == SpatterGenState::RUNNING);
+    return have_kernel && have_int_reg;
+}
+
+bool
+SpatterGen::interAccessOk(int int_regs, int fp_regs, Tick when) const
+{
+    bool have_int_reg = int_regs > 0;
+    bool have_index = receiveBuffer.hasReady(when);
+    bool mid_idx = have_index && (receiveBuffer.front()->tripsLeft() > 1);
+    return mid_idx && have_int_reg;
+}
+
+bool
+SpatterGen::ultAccessOk(int int_regs, int fp_regs, Tick when) const
+{
+    bool have_fp_reg = fp_regs > 0;
+    bool have_index = receiveBuffer.hasReady(when);
+    bool val_idx = have_index && (receiveBuffer.front()->tripsLeft() == 1);
+    return val_idx && have_fp_reg;
+}
+
+void
+SpatterGen::scheduleNextGenEvent(Tick when)
+{
+    int avail_int_regs = intRegFileSize - intRegUsed;
+    int avail_fp_regs = fpRegFileSize - fpRegUsed;
+    bool have_work = initAccessOk(avail_int_regs, avail_fp_regs, curTick()) ||
+                    interAccessOk(avail_int_regs, avail_fp_regs, curTick()) ||
+                    ultAccessOk(avail_int_regs, avail_fp_regs, curTick());
+    Tick schedule_tick = std::max(when, firstGeneratorAvailableTime);
+    if (have_work && (!nextGenEvent.scheduled())) {
+        schedule(nextGenEvent, schedule_tick);
+        firstGeneratorAvailableTime = MaxTick;
+    }
+}
+
+void
+SpatterGen::processNextGenEvent()
+{
+    assert(!nextGenEvent.pending());
+    int req_buf_before = requestBuffer.size();
+    // track changes to intRegUsed in this variable and apply it
+    // at the end of the for loop. This way if we free a register
+    // in the for loop, other iterations of the for loop won't
+    // observe this change. This matches what happens in real h/w.
+    int int_used_now = 0;
+    // track this independently to prevent different iterations inside
+    // for loop observing change to h/w resources, i.e we can't rely
+    // intRegFileSize - intRegUsed to see if we have registers to allocate
+    // since they don't change until after the for loop
+    int int_regs_now = intRegFileSize - intRegUsed;
+    // same explanation as int_used_now
+    int fp_used_now = 0;
+    // same explanation as int_regs_now
+    int fp_regs_now = fpRegFileSize - fpRegUsed;
+    for (int i = 0; i < requestGenRate; i++) {
+        if (generatorBusyUntil[i] > curTick()) {
+            DPRINTF(
+                SpatterGen,
+                "%s: AGU[%d] is busy this cycle.\n", __func__, i
+            );
+            continue;
+        }
+        if (!(requestBuffer.size() < requestBufferEntries)) {
+            // if no space left in the requestBuffer sleep
+            // whoever pops from requestBuffer wakes us up.
+            nextGenEvent.sleep();
+            break;
+        }
+        // Now we know that AGU[i] is available and there is room
+        // in the requestBuffer to put the packet.
+        if (ultAccessOk(int_regs_now, fp_regs_now, curTick())) {
+            // occupy one fp register
+            fp_regs_now--;
+            fp_used_now++;
+            // make AGU busy for the next requestGenLatency cycles.
+            generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency));
+
+            // create a new packet to access
+            SpatterAccess* spatter_access = receiveBuffer.front();
+            PacketPtr pkt = spatter_access->nextPacket();
+            pkt->pushSenderState(spatter_access);
+
+            // push to requestBuffer
+            requestBuffer.push(pkt, curTick());
+            DPRINTF(
+                SpatterGen,
+                "%s: Pushed pkt: %s to requestBuffer.\n",
+                __func__, pkt->print()
+            );
+
+            // now deallocate resources for reading the index
+            int_used_now--;
+            receiveBuffer.pop();
+        } else if (interAccessOk(int_regs_now, fp_regs_now, curTick())) {
+            // occupy one int register
+            int_regs_now--;
+            int_used_now++;
+            // make AGU busy for the next requestGenLatency cycles.
+            generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency));
+
+            // create a new packet to access
+            SpatterAccess* spatter_access = receiveBuffer.front();
+            PacketPtr pkt = spatter_access->nextPacket();
+            pkt->pushSenderState(spatter_access);
+
+            // push to requestBuffer
+            requestBuffer.push(pkt, curTick());
+            DPRINTF(
+                SpatterGen,
+                "%s: Pushed pkt: %s to requestBuffer.\n",
+                __func__, pkt->print()
+            );
+
+            // now deallocate resources for reading the index
+            int_used_now--;
+            receiveBuffer.pop();
+        } else if (initAccessOk(int_regs_now, fp_regs_now, curTick())) {
+            // occupy one int register
+            int_regs_now--;
+            int_used_now++;
+            generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency));
+
+            SpatterKernel& front = kernels.front();
+            SpatterAccess* spatter_access = front.nextSpatterAccess();
+            PacketPtr pkt = spatter_access->nextPacket();
+            pkt->pushSenderState(spatter_access);
+
+            requestBuffer.push(pkt, curTick());
+            DPRINTF(
+                SpatterGen,
+                "%s: Pushed pkt: %s to requestBuffer.\n",
+                __func__, pkt->print()
+            );
+
+            if (front.done()) {
+                DPRINTF(
+                    SpatterKernel,
+                    "%s: Done with kernel %d type: %s.\n",
+                    __func__, front.id(),
+                    SpatterKernelTypeStrings[front.type()]
+                );
+                kernels.pop();
+                // If we're processing synchronously we now have to stop
+                // making intial accesses and wait everyone to receive
+                // all expected responses.
+                if (mode == SpatterProcessingMode::synchronous) {
+                    state = SpatterGenState::DRAINING;
+                }
+            }
+        } else {
+            //
+            DPRINTF(
+                SpatterGen,
+                "%s: Nothing more could be done this cycle.\n", __func__
+                );
+            DPRINTF(SpatterGen, "%s: Here is h/w status report: "
+                "{KERNELS_REMAIN: %d, INDEXES_REMAIN: %d, INT_REG_USED: %d, "
+                "FP_REG_USED: %d, REQ_BUFF_SIZE: %d}.\n",
+                __func__, kernels.size(), receiveBuffer.size(),
+                intRegUsed, fpRegUsed, requestBuffer.size());
+            break;
+        }
+    }
+
+    // update firstGeneratorAvailableTime after making all changes.
+    for (int i = 0; i < requestGenRate; i++) {
+        generatorBusyUntil[i] = std::max(generatorBusyUntil[i], nextCycle());
+        firstGeneratorAvailableTime = std::min(
+                                            firstGeneratorAvailableTime,
+                                            generatorBusyUntil[i]
+                                            );
+    }
+
+    // now that we have simulated all the work of this cycle, we can
+    // apply the deltas to the h/w resources.
+    intRegUsed += int_used_now;
+    fpRegUsed += fp_used_now;
+
+    bool did_work = (requestBuffer.size() - req_buf_before) > 0;
+    if (did_work && (!nextSendEvent.pending())) {
+        scheduleNextSendEvent(nextCycle());
+    }
+
+    if (!nextGenEvent.pending()) {
+        scheduleNextGenEvent(firstGeneratorAvailableTime);
+    }
+}
+
+void
+SpatterGen::scheduleNextSendEvent(Tick when)
+{
+    bool have_work = !requestBuffer.empty();
+    Tick schedule_tick = std::max(when, firstPortAvailableTime);
+    if (have_work && (!nextSendEvent.scheduled())) {
+        schedule(nextSendEvent, schedule_tick);
+        firstPortAvailableTime = MaxTick;
+    }
+}
+
+void
+SpatterGen::processNextSendEvent()
+{
+    int req_buf_before = requestBuffer.size();
+    for (int i = 0; i < sendRate; i++) {
+        if (portBusyUntil[i] > curTick()) {
+            DPRINTF(
+                SpatterGen,
+                "%s: Port[%d] is busy this cycle.\n", __func__, i
+            );
+            continue;
+        }
+        if (requestBuffer.empty()) {
+            DPRINTF(
+                SpatterGen,
+                "%s: No packets to send this cycle.\n", __func__
+            );
+            break;
+        }
+        if (!requestBuffer.hasReady(curTick())) {
+            DPRINTF(
+                SpatterGen,
+                "%s: Packet at front of requestBuffer not ready this cycle.\n",
+                __func__
+            );
+            break;
+        }
+        PacketPtr pkt = requestBuffer.front();
+        DPRINTF(
+            SpatterGen,
+            "%s: Sending pkt: %s to port[%d].\n",
+            __func__, pkt->print(), i
+        );
+        // NOTE: We assume the port will be busy for 1 cycle.
+        portBusyUntil[i] = clockEdge(Cycles(1));
+        port.sendPacket(pkt);
+        requestBuffer.pop();
+        // increase numPendingMemRequests
+        numPendingMemRequests++;
+        // record packet departure time
+        requestDepartureTime[pkt->req] = curTick();
+        // Now if we put the port in blocked state no point in continuing
+        // the loop. also no point in scheduling nextSendEvent.
+        if (port.blocked()) {
+            nextSendEvent.sleep();
+            break;
+        }
+    }
+    // update firstPortAvailableTime after making all changes.
+    for (int i = 0; i < sendRate; i++) {
+        // if the port was not used this cycle, it's busy until nextCycle().
+        portBusyUntil[i] = std::max(portBusyUntil[i], nextCycle());
+        firstPortAvailableTime = std::min(
+                                        firstPortAvailableTime,
+                                        portBusyUntil[i]
+                                        );
+    }
+
+    bool did_work = (req_buf_before - requestBuffer.size()) > 0;
+    if (did_work && nextGenEvent.pending()) {
+        // since this event might open up space for output of nextGenEvent,
+        // it should wake it up if nextGenEvent is asleep.
+        nextGenEvent.wake();
+        scheduleNextGenEvent(nextCycle());
+    }
+
+    if (!nextSendEvent.pending()) {
+        scheduleNextSendEvent(nextCycle());
+    }
+}
+
+SpatterGen::SpatterGenStats::SpatterGenStats(SpatterGen* spatter_gen):
+    statistics::Group(spatter_gen), spatterGen(spatter_gen),
+    ADD_STAT(numIndexReads, statistics::units::Count::get(),
+        "Number of reads from the indexer array."),
+    ADD_STAT(indexBytesRead, statistics::units::Byte::get(),
+        "Number of bytes read from the indexer array."),
+    ADD_STAT(totalIndexReadLatency, statistics::units::Tick::get(),
+        "Total latency for reading from the indexer array."),
+    ADD_STAT(numValueReads, statistics::units::Count::get(),
+        "Number of reads from the values array."),
+    ADD_STAT(numValueWrites, statistics::units::Count::get(),
+        "Number of writes to the values array."),
+    ADD_STAT(valueBytesRead, statistics::units::Byte::get(),
+        "Number of bytes read from the values array."),
+    ADD_STAT(valueBytesWritten, statistics::units::Byte::get(),
+        "Number of bytes written to the values array."),
+    ADD_STAT(totalValueReadLatency, statistics::units::Tick::get(),
+        "Total latency for reading from the values array."),
+    ADD_STAT(totalValueWriteLatency, statistics::units::Tick::get(),
+        "Total latency for writing to the values array."),
+    ADD_STAT(indexAccessLatency, statistics::units::Tick::get(),
+        "Distribution of latency for accessing the indexer array."),
+    ADD_STAT(valueAccessLatency, statistics::units::Tick::get(),
+        "Distribution of latency for accessing the values array."),
+    ADD_STAT(totalIndirectAccessLatency, statistics::units::Tick::get(),
+        "Distribution of total latency for indirect accesses.")
+{}
+
+void
+SpatterGen::SpatterGenStats::regStats()
+{
+    using namespace statistics;
+    indexAccessLatency.init(8);
+    valueAccessLatency.init(16);
+    totalIndirectAccessLatency.init(16);
+}
+
+} // namespace gem5
diff --git a/src/cpu/testers/spatter_gen/spatter_gen.hh b/src/cpu/testers/spatter_gen/spatter_gen.hh
new file mode 100644
index 0000000000..1b8a8dbb61
--- /dev/null
+++ b/src/cpu/testers/spatter_gen/spatter_gen.hh
@@ -0,0 +1,252 @@
+/*
+* Copyright (c) 2024 The Regents of The University of California
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met: redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer;
+* redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution;
+* neither the name of the copyright holders nor the names of its
+* contributors may be used to endorse or promote products derived from
+* this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__
+#define __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__
+
+#include <queue>
+#include <unordered_map>
+#include <vector>
+
+#include "base/statistics.hh"
+#include "base/stats/group.hh"
+#include "cpu/testers/spatter_gen/utility_structs.hh"
+#include "enums/SpatterKernelType.hh"
+#include "enums/SpatterProcessingMode.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+#include "params/SpatterGen.hh"
+#include "sim/clocked_object.hh"
+#include "sim/eventq.hh"
+
+namespace gem5
+{
+
+
+/**
+ * @class SpatterGen
+ * @brief Spatter Kernel Player
+ *
+ * This class takes Spatter JSON traces and plays them back in gem5.
+ * Each trace includes a list of Spatter kernels, which are played in order.
+ * Kernels are either of type scatter or gather.
+ * At the time of writing, kernels represent accesses to the memory with
+ * one level of indirection.
+ * Initially, an access is made to an array which we call index from now on.
+ * The index array is streamed through with load accesses.
+ * In a high level programming language this access will be similar to below.
+ * "for (int i = 0; i < n; i++) { idx = index[i]; }".
+ * The value at index[i] is then used to access another array which we will
+ * call value from now on.
+ * For scatter type kernels, a random value is stored in the location and
+ * for gather type kernels, the value is read from the location.
+ * In a high level programming language this access will be similar to below.
+ * Scatter
+ * "for (int i = 0; i < n; i++) { idx = index[i]; value[idx] = rand(); }".
+ * Gather
+ * "for (int i = 0; i < n; i++) { idx = index[i]; val = value[idx]; }".
+ * For more information you can take a look at
+ * https://github.com/hpcgarage/spatter/blob/main/README.md
+ * While the readme mentions MultiScatter and MultiGather kernels, the
+ * trace format is not finalized (at the time of writing).
+ */
+class SpatterGen: public ClockedObject
+{
+  private:
+    typedef enums::SpatterKernelType SpatterKernelType;
+    typedef enums::SpatterProcessingMode SpatterProcessingMode;
+
+    class SpatterGenEvent : public EventFunctionWrapper
+    {
+      private:
+        // TODO: split pending into pendingInput and pendingOutput
+        enum class SleepState
+        {
+            AWAKE,
+            ASLEEP
+        };
+
+        SleepState _state;
+
+      public:
+        SpatterGenEvent(const std::function<void(void)> &callback,
+                    const std::string &name):
+            EventFunctionWrapper(callback, name), _state(SleepState::AWAKE)
+        {}
+        // a SpatterGenEvent will only be asleep if it is pending output
+        bool pending() const { return _state == SleepState::ASLEEP; }
+        void sleep() { _state = SleepState::ASLEEP; }
+        void wake() { _state = SleepState::AWAKE; }
+    };
+
+    class SpatterGenPort: public RequestPort
+    {
+      private:
+        SpatterGen* owner;
+        PacketPtr blockedPacket;
+
+      public:
+        SpatterGenPort(SpatterGen* owner, const std::string& name):
+            RequestPort(name), owner(owner), blockedPacket(nullptr) {}
+
+        void sendPacket(PacketPtr pkt);
+        bool blocked() const { return blockedPacket != nullptr; }
+
+      protected:
+        virtual bool recvTimingResp(PacketPtr pkt) override;
+        virtual void recvReqRetry() override;
+    };
+
+    struct SpatterGenStats: public statistics::Group
+    {
+        SpatterGen* spatterGen;
+
+        // TODO: When we enable multiple levels of indirection, we should
+        // convert this to a vector with one stat for each level of index
+        statistics::Scalar numIndexReads;
+        // TODO: When we enable multiple levels of indirection, we should
+        // convert this to a vector with one stat for each level of index
+        statistics::Scalar indexBytesRead;
+        statistics::Scalar totalIndexReadLatency;
+
+        statistics::Scalar numValueReads;
+        statistics::Scalar numValueWrites;
+        statistics::Scalar valueBytesRead;
+        statistics::Scalar valueBytesWritten;
+        statistics::Scalar totalValueReadLatency;
+        statistics::Scalar totalValueWriteLatency;
+
+        // TODO: When we enable multiple levels of indirection, we should
+        // convert this to a vector with one stat for each level of index
+        statistics::Histogram indexAccessLatency;
+        statistics::Histogram valueAccessLatency;
+        statistics::Histogram totalIndirectAccessLatency;
+
+        virtual void regStats() override;
+
+        SpatterGenStats(SpatterGen* spatter_gen);
+    };
+
+    enum class SpatterGenState
+    {
+        // waiting for all other cores to get to WAITING state, no accesses
+        WAITING,
+        // only creating intermediate and ultimate accesses, i.e. wrapping up
+        DRAINING,
+        // creating all kinds of accesses, initial, intermediate, and ultimate
+        RUNNING
+    };
+
+    // non param related members
+    SpatterGenState state;
+    std::queue<SpatterKernel> kernels;
+    std::unordered_map<RequestPtr, Tick> requestDepartureTime;
+
+    RequestorID requestorId;
+    int numPendingMemRequests;
+
+    SpatterGenStats stats;
+
+    void checkForSimExit();
+
+    bool initAccessOk(int int_regs, int fp_regs, Tick when) const;
+    bool interAccessOk(int int_regs, int fp_regs, Tick when) const;
+    bool ultAccessOk(int int_regs, int fp_regs, Tick when) const;
+
+    // param related members (not necessarily one-to-one with params)
+    SpatterProcessingMode mode;
+    SpatterGenPort port;
+    // size of the register files,
+    // for every memory instruction we need to allocate one register.
+    int intRegFileSize;
+    int intRegUsed;
+    int fpRegFileSize;
+    int fpRegUsed;
+    // laterncy to generate A request
+    int requestGenLatency;
+    // number of requests generated per event
+    int requestGenRate;
+    // tracking smallest tick when at least one "AGU" is available;
+    Tick firstGeneratorAvailableTime;
+    // tracking the busy state of our so called "AGU"s.
+    std::vector<Tick> generatorBusyUntil;
+    SpatterGenEvent nextGenEvent;
+    void processNextGenEvent();
+    // put requests to the cache in the request buffer.
+    int requestBufferEntries;
+    // store request packet along with their insertion time into this queue.
+    TimedQueue<PacketPtr> requestBuffer;
+    // if nextGenEvent has to be schedule at tick when then schedule it.
+    // this function should only be called when nextGenEvent is not pending.
+    void scheduleNextGenEvent(Tick when);
+
+    // bandwidth to issue memory requests to cache,
+    // this is supposed to model the number of cache ports
+    // we will assume it takes 1 cycle to issue memory requests
+    int sendRate;
+    Tick firstPortAvailableTime;
+    std::vector<Tick> portBusyUntil;
+    SpatterGenEvent nextSendEvent;
+    void processNextSendEvent();
+    // if nextSendEvent has to be schedule at tick when then schedule it.
+    // this function should only be called when nextSendEvent is not pending.
+    void scheduleNextSendEvent(Tick when);
+
+    // put the memory responses here.
+    // no need to limit the size of this buffer.
+    // it's a response buffer and it will automatically
+    // be limited by requestBufferEntries, intRegFileSize, fpRegFileSize
+    TimedQueue<SpatterAccess*> receiveBuffer;
+
+  public:
+    PARAMS(SpatterGen);
+    SpatterGen(const Params& params);
+
+    Port&
+    getPort(const std::string& if_name, PortID idx = InvalidPortID) override;
+
+    virtual void startup() override;
+
+    void recvReqRetry();
+    bool recvTimingResp(PacketPtr pkt);
+
+    // PyBindMethod to interface adding a kernel with python JSON frontend.
+    void addKernel(
+        uint32_t id, uint32_t delta, uint32_t count,
+        SpatterKernelType type,
+        size_t index_size, Addr base_index_addr,
+        size_t value_size, Addr base_value_addr,
+        const std::vector<uint32_t>& indices
+    );
+
+    void proceedPastSyncPoint();
+};
+
+} // namespace gem5
+
+#endif // __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__
diff --git a/src/cpu/testers/spatter_gen/utility_structs.hh b/src/cpu/testers/spatter_gen/utility_structs.hh
new file mode 100644
index 0000000000..21bff9e8ae
--- /dev/null
+++ b/src/cpu/testers/spatter_gen/utility_structs.hh
@@ -0,0 +1,242 @@
+/*
+* Copyright (c) 2024 The Regents of The University of California
+* All rights reserved.
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met: redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer;
+* redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution;
+* neither the name of the copyright holders nor the names of its
+* contributors may be used to endorse or promote products derived from
+* this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__
+#define __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__
+
+#include <deque>
+#include <queue>
+
+#include "base/random.hh"
+#include "base/types.hh"
+#include "enums/SpatterKernelType.hh"
+#include "mem/packet.hh"
+
+namespace gem5
+{
+
+template<typename T>
+class TimedQueue
+{
+  private:
+    Tick latency;
+
+    std::queue<T> items;
+    std::queue<Tick> insertionTimes;
+
+  public:
+    TimedQueue(Tick latency): latency(latency) {}
+
+    void push(T item, Tick insertion_time)
+    {
+        items.push(item);
+        insertionTimes.push(insertion_time);
+    }
+
+    void pop()
+    {
+        items.pop();
+        insertionTimes.pop();
+    }
+
+    T front() const { return items.front(); }
+
+    bool empty() const { return items.empty(); }
+
+    size_t size() const { return items.size(); }
+
+    bool hasReady(Tick current_time) const
+    {
+        if (empty()) {
+            return false;
+        }
+        return (current_time - insertionTimes.front()) >= latency;
+    }
+};
+
+
+
+// Represents a single access to a SpatterKernel.
+// It supports multiple levels of indirection.
+// However, the SpatterKernel class only works with one level of
+// indirection (i.e. accessing value[index[i]]).
+struct SpatterAccess : public Packet::SenderState
+{
+    typedef std::tuple<Addr, size_t> AccessPair;
+    typedef enums::SpatterKernelType SpatterKernelType;
+
+    RequestorID requestorId;
+    SpatterKernelType _kernelType;
+    Tick accTripTime;
+    std::queue<AccessPair> accessPairs;
+
+    SpatterAccess(
+        RequestorID requestor_id,
+        SpatterKernelType kernel_type,
+        const std::queue<AccessPair>& access_pairs
+    ):
+        requestorId(requestor_id), _kernelType(kernel_type),
+        accTripTime(0), accessPairs(access_pairs)
+    {}
+
+    SpatterKernelType type() const { return _kernelType; }
+
+    int tripsLeft() const { return accessPairs.size(); }
+
+    void recordTripTime(Tick trip_time) { accTripTime += trip_time; }
+
+    Tick tripTimeSoFar() const { return accTripTime; }
+
+    AccessPair nextAccessPair()
+    {
+        assert(tripsLeft() > 0);
+        AccessPair access_pair = accessPairs.front();
+        accessPairs.pop();
+        return access_pair;
+    }
+
+    PacketPtr nextPacket()
+    {
+        Addr addr;
+        size_t size;
+        std::tie(addr, size) = nextAccessPair();
+        MemCmd cmd;
+        if (tripsLeft() >= 1){
+            cmd = MemCmd::ReadReq;
+        } else {
+            cmd = _kernelType == \
+                SpatterKernelType::gather ? MemCmd::ReadReq : MemCmd::WriteReq;
+        }
+        return createPacket(addr, size, cmd);
+    }
+
+    PacketPtr createPacket(Addr addr, size_t size, MemCmd cmd) const
+    {
+        RequestPtr req = std::make_shared<Request>(addr, size, 0, requestorId);
+
+        // Dummy PC to have PC-based prefetchers latch on;
+        // get entropy into higher bits
+        // This piece of code is directly copied from
+        // gem5::TrafficGen::
+        req->setPC(((Addr) requestorId) << 2);
+        PacketPtr pkt = new Packet(req, cmd);
+        uint8_t* pkt_data = new uint8_t[req->getSize()];
+        // Randomly intialize pkt_data, for testing cache coherence.
+        for (int i = 0; i < req->getSize(); i++) {
+            pkt_data[i] = random_mt.random<uint8_t>();
+        }
+        pkt->dataDynamic(pkt_data);
+        return pkt;
+    }
+};
+
+class SpatterKernel
+{
+  private:
+    typedef enums::SpatterKernelType SpatterKernelType;
+    typedef SpatterAccess::AccessPair AccessPair;
+
+    RequestorID requestorId;
+    uint32_t _id;
+    uint32_t delta;
+    uint32_t count;
+
+    SpatterKernelType _type;
+
+    size_t indexSize;
+    Addr baseIndexAddr;
+
+    size_t valueSize;
+    Addr baseValueAddr;
+
+    // needed to iterate over indices multiple times.
+    uint32_t index;
+    // current iteration over indices
+    uint32_t iteration;
+
+    // number of times we have left to roll indices to finish one iteration.
+    uint32_t remRolls;
+    std::deque<uint32_t> indices;
+
+  public:
+
+    SpatterKernel(
+        RequestorID requestor_id,
+        uint32_t id, uint32_t delta, uint32_t count,
+        SpatterKernelType type,
+        size_t index_size, Addr base_index_addr,
+        size_t value_size, Addr base_value_addr
+    ):
+        requestorId(requestor_id),
+        _id(id), delta(delta), count(count),
+        _type(type),
+        indexSize(index_size), baseIndexAddr(base_index_addr),
+        valueSize(value_size), baseValueAddr(base_value_addr),
+        index(0), iteration(0), remRolls(0)
+    {}
+
+    uint32_t id() const { return _id; }
+
+    void setIndices(const std::vector<uint32_t>& pattern)
+    {
+        indices.assign(pattern.begin(), pattern.end());
+        remRolls = indices.size();
+    }
+
+    SpatterKernelType type() const { return _type; }
+
+    bool done() const { return iteration == count; }
+
+    SpatterAccess* nextSpatterAccess()
+    {
+        std::queue<AccessPair> access_pairs;
+        Addr index_addr = baseIndexAddr + (index * indexSize);
+        access_pairs.emplace(index_addr, indexSize);
+        // update index in the index array
+        index++;
+
+        uint32_t front = indices.front();
+        uint32_t value_index = (delta * iteration) + front;
+        Addr value_addr = baseValueAddr + (value_index * valueSize);
+        access_pairs.emplace(value_addr, valueSize);
+        // roll indices
+        indices.pop_front();
+        indices.push_back(front);
+        remRolls--;
+        if (remRolls == 0) {
+            remRolls = indices.size();
+            iteration++;
+        }
+
+        return new SpatterAccess(requestorId, _type, access_pairs);
+    }
+};
+
+} // namespace gem5
+
+#endif // __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__

From d661023de48488463f4c84ea88d81f7b097c62a0 Mon Sep 17 00:00:00 2001
From: Mahyar Samani <msamani@ucdavis.edu>
Date: Tue, 28 May 2024 00:06:41 -0700
Subject: [PATCH 2/2] stdlib: Adding SpatterGenCore and SpatterGen

This change adds code for SpatterGenCore and SpatterGen as well
as SpatterKernel to the standard library. SpatterGenCore and
SpatterGen follow the same structure as AbstractCore and
AbstractProcessor. spatter_kernel.py adds utility functions
to parse dictionaries as well as partition a list into
multiple lists through interleaving to be used when setting up
a multicore SpatterGen.

Change-Id: I003553e97f901c0724f5feac0bb6e21a020bd6ad
---
 src/python/SConscript                         |   8 +
 .../processors/spatter_gen/__init__.py        |  33 +++
 .../spatter_gen/spatter_generator.py          | 147 +++++++++++++
 .../spatter_gen/spatter_generator_core.py     |  73 +++++++
 .../processors/spatter_gen/spatter_kernel.py  | 200 ++++++++++++++++++
 src/python/gem5/simulate/exit_event.py        |   3 +
 .../gem5/simulate/exit_event_generators.py    |   7 +
 src/python/gem5/simulate/simulator.py         |  15 +-
 8 files changed, 483 insertions(+), 3 deletions(-)
 create mode 100644 src/python/gem5/components/processors/spatter_gen/__init__.py
 create mode 100644 src/python/gem5/components/processors/spatter_gen/spatter_generator.py
 create mode 100644 src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py
 create mode 100644 src/python/gem5/components/processors/spatter_gen/spatter_kernel.py

diff --git a/src/python/SConscript b/src/python/SConscript
index fc2241fa09..af117e4a14 100644
--- a/src/python/SConscript
+++ b/src/python/SConscript
@@ -252,6 +252,14 @@ PySource('gem5.components.processors',
     'gem5/components/processors/random_generator_core.py')
 PySource('gem5.components.processors',
     'gem5/components/processors/random_generator.py')
+PySource('gem5.components.processors.spatter_gen',
+         'gem5/components/processors/spatter_gen/__init__.py')
+PySource('gem5.components.processors.spatter_gen',
+    'gem5/components/processors/spatter_gen/spatter_generator_core.py')
+PySource('gem5.components.processors.spatter_gen',
+    'gem5/components/processors/spatter_gen/spatter_generator.py')
+PySource('gem5.components.processors.spatter_gen',
+    'gem5/components/processors/spatter_gen/spatter_kernel.py')
 PySource('gem5.components.processors',
     'gem5/components/processors/simple_core.py')
 PySource('gem5.components.processors',
diff --git a/src/python/gem5/components/processors/spatter_gen/__init__.py b/src/python/gem5/components/processors/spatter_gen/__init__.py
new file mode 100644
index 0000000000..3c1847b914
--- /dev/null
+++ b/src/python/gem5/components/processors/spatter_gen/__init__.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2024 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+from .spatter_generator import SpatterGenerator
+from .spatter_kernel import (
+    SpatterKernel,
+    parse_kernel,
+    partition_trace,
+)
diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_generator.py b/src/python/gem5/components/processors/spatter_gen/spatter_generator.py
new file mode 100644
index 0000000000..72939f82dc
--- /dev/null
+++ b/src/python/gem5/components/processors/spatter_gen/spatter_generator.py
@@ -0,0 +1,147 @@
+# Copyright (c) 2024 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import (
+    List,
+    Optional,
+    Union,
+)
+
+from m5.objects import (
+    SpatterProcessingMode,
+    SrcClockDomain,
+    VoltageDomain,
+)
+from m5.stats import dump as dump_stats
+from m5.stats import reset as reset_stats
+from m5.util import fatal
+
+from ....utils.override import overrides
+from ..abstract_generator import AbstractGenerator
+from .spatter_generator_core import SpatterGeneratorCore
+from .spatter_kernel import SpatterKernel
+
+
+class SpatterGenerator(AbstractGenerator):
+    def __init__(
+        self,
+        num_cores: int = 1,
+        processing_mode: Union[SpatterProcessingMode, str] = "synchronous",
+        int_regfile_size: int = 384,
+        fp_regfile_size: int = 224,
+        request_gen_latency: int = 2,
+        request_gen_rate: int = 4,
+        request_buffer_entries: int = 32,
+        send_rate: int = 2,
+        clk_freq: Optional[str] = None,
+    ) -> None:
+        super().__init__(
+            cores=self._create_cores(
+                num_cores,
+                processing_mode,
+                int_regfile_size,
+                fp_regfile_size,
+                request_gen_latency,
+                request_gen_rate,
+                request_buffer_entries,
+                send_rate,
+            )
+        )
+        # no need for else block since it will intialize generator.clk_domain
+        # the clock domain of its closest ancestor in the SimObject tree.
+        if not clk_freq is None:
+            clock_domain = SrcClockDomain(
+                clock=clk_freq, voltage_domain=VoltageDomain()
+            )
+            for generator in self.cores:
+                generator.clk_domain = clock_domain
+
+        self._num_kernels = 0
+        self._sync = processing_mode == "synchronous"
+
+    def _create_cores(
+        self,
+        num_cores: int,
+        processing_mode: Union[SpatterProcessingMode, str],
+        int_regfile_size: int,
+        fp_regfile_size: int,
+        request_gen_latency: int,
+        request_gen_rate: int,
+        request_buffer_entries: int,
+        send_rate: int,
+    ) -> List[SpatterGeneratorCore]:
+        return [
+            SpatterGeneratorCore(
+                processing_mode,
+                int_regfile_size,
+                fp_regfile_size,
+                request_gen_latency,
+                request_gen_rate,
+                request_buffer_entries,
+                send_rate,
+            )
+            for _ in range(num_cores)
+        ]
+
+    def add_kernel(self, kernels: List[SpatterKernel]) -> None:
+        assert len(kernels) == len(self.cores)
+        for core, kernel in zip(self.cores, kernels):
+            if kernel.empty():
+                fatal(
+                    f"Cannot add {kernel} since it's empty. "
+                    "At the moment SpatterGenerator (or gem5::SpatterGen) "
+                    "does not support adding empty kernels to cores. As a "
+                    "temporary fix you can try adding 1 dummy element to the "
+                    "trace. You can also set fix_empty_trace to True in the "
+                    "constructor of the SpatterKernel which automatically "
+                    "inserts a dummy element (0) to the trace."
+                )
+            core.add_kernel(kernel)
+        self._num_kernels += 1
+
+    @overrides(AbstractGenerator)
+    def start_traffic(self) -> None:
+        for core in self.cores:
+            core.start_traffic()
+
+    def _proceed_past_sync_point(self) -> None:
+        if not self._sync:
+            return
+        for core in self.cores:
+            core.generator.proceedPastSyncPoint()
+
+    def handle_spatter_exit(self):
+        spatter_exits_observed = 0
+        sync_points_observed = 0
+        sync_points_expected = self._num_kernels if self._sync else 1
+        while True:
+            spatter_exits_observed += 1
+            if spatter_exits_observed % len(self.cores) == 0:
+                sync_points_observed += 1
+                dump_stats()
+                reset_stats()
+                self._proceed_past_sync_point()
+            yield not (sync_points_observed < sync_points_expected)
diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py b/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py
new file mode 100644
index 0000000000..50799eae84
--- /dev/null
+++ b/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2024 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from typing import Union
+
+from m5.objects import (
+    Port,
+    SpatterGen,
+    SpatterProcessingMode,
+)
+
+from ....utils.override import overrides
+from ..abstract_core import AbstractCore
+from ..abstract_generator_core import AbstractGeneratorCore
+from .spatter_kernel import SpatterKernel
+
+
+class SpatterGeneratorCore(AbstractGeneratorCore):
+    def __init__(
+        self,
+        processing_mode: Union[SpatterProcessingMode, str],
+        int_regfile_size: int,
+        fp_regfile_size: int,
+        request_gen_latency: int,
+        request_gen_rate: int,
+        request_buffer_entries: int,
+        send_rate: int,
+    ):
+        super().__init__()
+        self.generator = SpatterGen(
+            processing_mode=processing_mode,
+            int_regfile_size=int_regfile_size,
+            fp_regfile_size=fp_regfile_size,
+            request_gen_latency=request_gen_latency,
+            request_gen_rate=request_gen_rate,
+            request_buffer_entries=request_buffer_entries,
+            send_rate=send_rate,
+        )
+        self._kernels = []
+
+    @overrides(AbstractCore)
+    def connect_dcache(self, port: Port) -> None:
+        self.generator.port = port
+
+    def add_kernel(self, kernel: SpatterKernel) -> None:
+        self._kernels.append(kernel)
+
+    def start_traffic(self) -> None:
+        for kernel in self._kernels:
+            self.generator.addKernel(*kernel.cxx_call_args())
diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py
new file mode 100644
index 0000000000..4cf0ee814a
--- /dev/null
+++ b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py
@@ -0,0 +1,200 @@
+# Copyright (c) 2024 The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from math import ceil
+from typing import (
+    List,
+    Tuple,
+)
+
+from m5.objects import SpatterKernelType
+from m5.params import Addr
+from m5.util import inform
+
+
+def parse_kernel(kernel: dict, default_delta=8) -> Tuple[int, int, str, List]:
+    delta = kernel.get("delta", default_delta)
+    if delta < 0:
+        inform(
+            f"Negative delta found: {delta}. Setting it to {default_delta}."
+        )
+        delta = default_delta
+    count = kernel.get("count", 1)
+    type = kernel.get("kernel", None)
+    if type is None:
+        raise ValueError(f"Keyword 'kernel' not found.")
+    type = SpatterKernelType(type.lower())
+    trace = kernel.get("pattern", [])
+    if len(trace) == 0:
+        raise ValueError(f"Empty 'pattern' found.")
+    return (delta, count, type, trace)
+
+
+def partition_trace(original_trace, num_partitions, interleave_size):
+    partitions = [[] for _ in range(num_partitions)]
+    num_leaves = ceil(len(original_trace) / interleave_size)
+    for i in range(num_leaves):
+        lower_bound = i * interleave_size
+        upper_bound = min(lower_bound + interleave_size, len(original_trace))
+        partitions[i % num_partitions] += original_trace[
+            lower_bound:upper_bound
+        ]
+    return partitions
+
+
+class SpatterKernel:
+    """This class encapsulates one kernel in a spatter trace.
+        A spatter trace is represented with a json file.
+        An example of a spatter trace can be found here:
+    https://github.com/hpcgarage/spatter/blob/main/standard-suite/app-traces/amg.json
+        Each trace may have multiple kernels.
+        Each kernel represents a code execution like below
+            for (int iteration = 0; iteration < count; iteration++)
+            {
+                for (int i = 0; i < N; i++) {
+                    value[index[i] + iteration * delta] = rand(); // kernel: scatter
+                    // OR
+                    sum += value[index[i] + iteration * delta]; // kernel: gather
+                }
+            }
+        Where `delta` and `count` are fields in each kernel.
+        `kernel` is another field that determines whether the accesses to value
+        are loads or stores.
+        The field `pattern` stores the index array.
+
+        This file provides two utility function to parse spatter traces:
+            parse_kernel: takes a dictionary and returns a tuple of
+            delta, count, type, and trace.
+            partition_trace: takes the original trace, number of partitions,
+            and interleave_size.
+            It returns a list of `num_partitions` partitions where each partition
+            is an list including interleaved elements from `original_trace`.
+            The elements in the `original_trace` are interleaved with a
+            granularity of `interleave_size`.
+        The code snippet below shows how to use these functions to create kernels.
+            generator = SpatterGenerator(num_cores)
+
+            with open(trace_path, "r") as trace_file:
+                kernels = json.load(trace_file)
+
+            for i, kernel in enumerate(kernels):
+                delta, count, type, og_trace = parse_kernel(kernel)
+                traces = partition_trace(og_trace, num_cores, 128)
+                kernels = [SpatterKernel(
+                                        kernel_id=i,
+                                        kernel_delta=delta,
+                                        kernel_count=count,
+                                        kernel_type=type,
+                                        kernel_trace=trace,
+                                        index_size=4,
+                                        base_index_addr=0,
+                                        value_size=8,
+                                        base_value_addr=0x400000000
+                                        )
+                            for trace in traces
+                            ]
+                generator.add_kernel(kernels)
+
+        Args:
+            kernel_id (int): The ID of the kernel.
+            User defined, i.e. spatter traces don't have this field.
+            It's used to identify the kernel in the simulation.
+            kernel_delta (int): The delta value of the kernel.
+            `delta` from spatter trace.
+            kernel_count (int): The count value of the kernel.
+            `count` from spatter trace.
+            kernel_type (SpatterKernelType): The type of the kernel.
+            `kernel` from spatter trace.
+            kernel_trace (List[int]): The elements of the `index` array.
+            `pattern` from spatter trace.
+            index_size (int): The size of elements in `index`.
+            User defined, i.e. spatter traces don't have this field.
+            It represents the size of elements in the `index` array in code above.
+            base_index_addr (Addr): The base address of the index.
+            User defined, i.e. spatter traces don't have this field.
+            It represents the pointer to the `index` array in the code above.
+            value_size (int): The size of elements in `value`.
+            User defined, i.e. spatter traces don't have this field.
+            It represents the size of elements in the `value` array in code above.
+            base_value_addr (Addr): The base address of the value.
+            User defined, i.e. spatter traces don't have this field.
+            It represents the pointer to the `value` array in the code above.
+    """
+
+    def __init__(
+        self,
+        kernel_id: int,
+        kernel_delta: int,
+        kernel_count: int,
+        kernel_type: SpatterKernelType,
+        kernel_trace: List[int],
+        index_size: int,
+        base_index_addr: Addr,
+        value_size: int,
+        base_value_addr: Addr,
+        fix_empty_trace: bool = False,
+    ):
+        self._id = kernel_id
+        self._delta = kernel_delta
+        self._count = kernel_count
+        self._trace = kernel_trace
+        self._type = kernel_type
+        self._index_size = index_size
+        self._base_index_addr = base_index_addr
+        self._value_size = value_size
+        self._base_value_addr = base_value_addr
+
+        if fix_empty_trace and len(kernel_trace) == 0:
+            inform(
+                "Empty trace found. Fixing it by adding a dummy element. "
+                "Also setting delta to 0 and count to 1.",
+            )
+            self._trace = [0]
+            self._delta = 0
+            self._count = 1
+
+    def empty(self):
+        return len(self._trace) == 0
+
+    def cxx_call_args(self):
+        return [
+            self._id,
+            self._delta,
+            self._count,
+            self._type.getValue(),
+            self._index_size,
+            self._base_index_addr,
+            self._value_size,
+            self._base_value_addr,
+            self._trace,
+        ]
+
+    def __str__(self):
+        return (
+            f"SpatterKernel(id={self._id}, delta={self._delta}, "
+            f"count={self._count}, type={self._type}, "
+            f"trace[:8]={self._trace[:8]}"
+        )
diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py
index b902643a3f..5a0bb3d1d7 100644
--- a/src/python/gem5/simulate/exit_event.py
+++ b/src/python/gem5/simulate/exit_event.py
@@ -39,6 +39,7 @@ class ExitEvent(Enum):
     EXIT = "exit"  # A standard vanilla exit.
     WORKBEGIN = "workbegin"  # An exit because a ROI has been reached.
     WORKEND = "workend"  # An exit because a ROI has ended.
+    SPATTER_EXIT = "spatter exit"  # An exit because a spatter core has ended.
     SWITCHCPU = "switchcpu"  # An exit needed to switch CPU cores.
     FAIL = "fail"  # An exit because the simulation has failed.
     CHECKPOINT = "checkpoint"  # An exit to load a checkpoint.
@@ -115,6 +116,8 @@ class ExitEvent(Enum):
         elif exit_string.endswith("is finished updating the memory.\n"):
             # This is for the gups generator exit event
             return ExitEvent.EXIT
+        elif exit_string.endswith("received all expected responses."):
+            return ExitEvent.SPATTER_EXIT
         raise NotImplementedError(
             f"Exit event '{exit_string}' not implemented"
         )
diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py
index 4d18b4cee0..b237b064e2 100644
--- a/src/python/gem5/simulate/exit_event_generators.py
+++ b/src/python/gem5/simulate/exit_event_generators.py
@@ -36,6 +36,7 @@ from m5.util import warn
 from gem5.resources.looppoint import Looppoint
 
 from ..components.processors.abstract_processor import AbstractProcessor
+from ..components.processors.spatter_gen import SpatterGenerator
 from ..components.processors.switchable_processor import SwitchableProcessor
 from ..resources.resource import SimpointResource
 
@@ -221,3 +222,9 @@ def looppoint_save_checkpoint_generator(
         yield False
 
     yield True
+
+
+def spatter_exit_generator(spatter_gen: SpatterGenerator):
+    while True:
+        assert isinstance(spatter_gen, SpatterGenerator)
+        yield from spatter_gen.handle_spatter_exit()
diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py
index 5a5cf9af89..66f67d6ffb 100644
--- a/src/python/gem5/simulate/simulator.py
+++ b/src/python/gem5/simulate/simulator.py
@@ -53,6 +53,7 @@ from .exit_event_generators import (
     reset_stats_generator,
     save_checkpoint_generator,
     skip_generator,
+    spatter_exit_generator,
     switch_generator,
     warn_default_decorator,
 )
@@ -281,6 +282,12 @@ class Simulator:
                 "creating a checkpoint and continuing",
             )(),
             ExitEvent.FAIL: exit_generator(),
+            ExitEvent.SPATTER_EXIT: warn_default_decorator(
+                spatter_exit_generator,
+                "spatter exit",
+                "dumping and resetting stats after each sync point. "
+                "Note that there will be num_cores*sync_points spatter_exits.",
+            )(spatter_gen=board.get_processor()),
             ExitEvent.SWITCHCPU: warn_default_decorator(
                 switch_generator,
                 "switch CPU",
@@ -518,9 +525,11 @@ class Simulator:
             self._board._pre_instantiate()
 
             root = Root(
-                full_system=self._full_system
-                if self._full_system is not None
-                else self._board.is_fullsystem(),
+                full_system=(
+                    self._full_system
+                    if self._full_system is not None
+                    else self._board.is_fullsystem()
+                ),
                 board=self._board,
             )