diff --git a/src/cpu/testers/spatter_gen/SConscript b/src/cpu/testers/spatter_gen/SConscript new file mode 100644 index 0000000000..86231409dd --- /dev/null +++ b/src/cpu/testers/spatter_gen/SConscript @@ -0,0 +1,38 @@ +# Copyright (c) 2024 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Import("*") + +SimObject( + "SpatterGen.py", + sim_objects=["SpatterGen"], + enums=["SpatterKernelType", "SpatterProcessingMode"], +) + +Source("spatter_gen.cc") + +DebugFlag("SpatterGen") +DebugFlag("SpatterKernel") diff --git a/src/cpu/testers/spatter_gen/SpatterGen.py b/src/cpu/testers/spatter_gen/SpatterGen.py new file mode 100644 index 0000000000..1c88f867ca --- /dev/null +++ b/src/cpu/testers/spatter_gen/SpatterGen.py @@ -0,0 +1,115 @@ +# Copyright (c) 2024 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.citations import add_citation +from m5.objects.ClockedObject import ClockedObject +from m5.params import * +from m5.proxy import * +from m5.util.pybind import PyBindMethod + + +class SpatterKernelType(Enum): + vals = ["scatter", "gather"] + + +class SpatterProcessingMode(Enum): + vals = ["synchronous", "asynchronous"] + + +class SpatterGen(ClockedObject): + type = "SpatterGen" + cxx_header = "cpu/testers/spatter_gen/spatter_gen.hh" + cxx_class = "gem5::SpatterGen" + + system = Param.System(Parent.any, "System this SpatterGen is a part of.") + + processing_mode = Param.SpatterProcessingMode( + "How to process kernels accross multiple SpatterGen cores. " + "Whether to synchronize on kernel boundaries or not." + ) + + port = RequestPort("Port to send memory requests.") + + int_regfile_size = Param.Int("Size of the integer register file.") + fp_regfile_size = Param.Int("Size of the floating point register file.") + request_gen_latency = Param.Cycles( + "Number of cycles to spend for creating a request." + ) + request_gen_rate = Param.Int("Number of requests generate per cycle.") + request_buffer_entries = Param.Int("Size of the request buffer.") + send_rate = Param.Int( + "Number of requests to send in parallel." + "Emulates the number of dcache ports." + ) + + cxx_exports = [ + PyBindMethod("addKernel"), + PyBindMethod("proceedPastSyncPoint"), + ] + + +add_citation( + SpatterGen, + """@inproceedings{10.1145/3422575.3422794, +author = {Lavin, Patrick and Young, Jeffrey and Vuduc, Richard and Riedy, +Jason and Vose, Aaron and Ernst, Daniel}, +title = {Evaluating Gather and Scatter Performance on CPUs and GPUs}, +year = {2021}, +isbn = {9781450388993}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3422575.3422794}, +doi = {10.1145/3422575.3422794}, +abstract = {This paper describes a new benchmark tool, +Spatter, for assessing memory system architectures in the context of a +specific category of indexed accesses known as gather and scatter. +These types of operations are increasingly used to express sparse and +irregular data access patterns, and they have widespread utility in many +modern HPC applications including scientific simulations, data mining and +analysis computations, and graph processing. However, many traditional +benchmarking tools like STREAM, STRIDE, and GUPS focus on characterizing +only uniform stride or fully random accesses despite evidence that modern +applications use varied sets of more complex access patterns. Spatter is an +open-source benchmark that provides a tunable and configurable framework to +benchmark a variety of indexed access patterns, including variations of gather +/ scatter that are seen in HPC mini-apps evaluated in this work. The design of +Spatter includes backends for OpenMP and CUDA, and experiments show how it can +be used to evaluate 1) uniform access patterns for CPU and GPU, 2) prefetching +regimes for gather / scatter, 3) compiler implementations of vectorization for +gather / scatter, and 4) trace-driven “proxy patterns” that reflect the +patterns found in multiple applications. The results from Spatter experiments +show, for instance, that GPUs typically outperform CPUs for these operations +in absolute bandwidth but not fraction of peak bandwidth, and that Spatter can +better represent the performance of some cache-dependent mini-apps than +traditional STREAM bandwidth measurements.}, +booktitle = {Proceedings of the International Symposium on Memory Systems}, +pages = {209–222}, +numpages = {14}, +location = {Washington, DC, USA}, +series = {MEMSYS '20} +} +""", +) diff --git a/src/cpu/testers/spatter_gen/spatter_gen.cc b/src/cpu/testers/spatter_gen/spatter_gen.cc new file mode 100644 index 0000000000..b57259911b --- /dev/null +++ b/src/cpu/testers/spatter_gen/spatter_gen.cc @@ -0,0 +1,582 @@ +/* +* Copyright (c) 2024 The Regents of The University of California +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "cpu/testers/spatter_gen/spatter_gen.hh" + +#include "base/cprintf.hh" +#include "debug/SpatterGen.hh" +#include "debug/SpatterKernel.hh" +#include "enums/SpatterKernelType.hh" +#include "enums/SpatterProcessingMode.hh" +#include "mem/packet.hh" +#include "sim/sim_exit.hh" +#include "sim/system.hh" + +namespace gem5 +{ + +using enums::SpatterKernelTypeStrings; +using enums::SpatterProcessingMode; + +SpatterGen::SpatterGen(const Params& params): + ClockedObject(params), + state(SpatterGenState::RUNNING), + requestorId(params.system->getRequestorId(this)), + numPendingMemRequests(0), + stats(this), + mode(params.processing_mode), + port(this, name() + ".port"), + intRegFileSize(params.int_regfile_size), intRegUsed(0), + fpRegFileSize(params.fp_regfile_size), fpRegUsed(0), + requestGenLatency(params.request_gen_latency), + requestGenRate(params.request_gen_rate), + firstGeneratorAvailableTime(0), + nextGenEvent([this](){ processNextGenEvent(); }, name() + ".GenEvent"), + requestBufferEntries(params.request_buffer_entries), + requestBuffer(clockPeriod()), + sendRate(params.send_rate), + firstPortAvailableTime(0), + nextSendEvent([this](){ processNextSendEvent(); }, name() + ".SendEvent"), + receiveBuffer(clockPeriod()) +{ + fatal_if(fpRegFileSize < requestBufferEntries, + "fp_regfile_size should be >= request_buffer_entries." + "if request_buffer_entries is bigger than fp_regfile_size," + "it may result in inaccuracies in your simulation." + "Ideally: fp_regfile_size >> request_buffer_entries." + ); + generatorBusyUntil.resize(requestGenRate, 0); + portBusyUntil.resize(sendRate, 0); +} + +Port& +SpatterGen::getPort(const std::string& if_name, PortID idx) +{ + if (if_name == "port") { + return port; + } else { + return ClockedObject::getPort(if_name, idx); + } +} + +void +SpatterGen::startup() +{ + scheduleNextGenEvent(curTick()); +} + +void +SpatterGen::SpatterGenPort::sendPacket(PacketPtr pkt) +{ + panic_if(blocked(), "Should never try to send if port is blocked."); + if (!sendTimingReq(pkt)) { + blockedPacket = pkt; + DPRINTF( + SpatterGen, + "%s: Port blocked when sending %s.\n", + __func__, pkt->print() + ); + } +} + +void +SpatterGen::SpatterGenPort::recvReqRetry() +{ + DPRINTF(SpatterGen, "%s: Port received a ReqRetry.\n", __func__); + panic_if( + blockedPacket == nullptr, + "Received reqRetry with no blocked packet." + ); + if (!sendTimingReq(blockedPacket)) { + DPRINTF( + SpatterGen, + "%s: Port blocked when sending %s.\n", + __func__, blockedPacket->print() + ); + } else { + blockedPacket = nullptr; + owner->recvReqRetry(); + } +} + +void +SpatterGen::recvReqRetry() +{ + if (nextSendEvent.pending()) { + nextSendEvent.wake(); + scheduleNextSendEvent(nextCycle()); + } +} + +bool +SpatterGen::SpatterGenPort::recvTimingResp(PacketPtr pkt) { + return owner->recvTimingResp(pkt); +} + +bool +SpatterGen::recvTimingResp(PacketPtr pkt) +{ + DPRINTF(SpatterGen, "%s: Received pkt: %s.\n", __func__, pkt->print()); + assert(pkt->isResponse()); + + // record trip time. + SpatterAccess* spatter_access = pkt->findNextSenderState(); + Tick trip_time = (curTick() - requestDepartureTime[pkt->req]); + requestDepartureTime.erase(pkt->req); + spatter_access->recordTripTime(trip_time); + + int trips_left = spatter_access->tripsLeft(); + assert(trips_left >= 0); + if (trips_left > 0) { + stats.numIndexReads++; + stats.indexBytesRead += pkt->getSize(); + stats.totalIndexReadLatency += trip_time; + + stats.indexAccessLatency.sample(trip_time); + receiveBuffer.push(spatter_access, curTick()); + } else { + stats.valueAccessLatency.sample(trip_time); + stats.totalIndirectAccessLatency.sample( + spatter_access->tripTimeSoFar() + ); + if (spatter_access->type() == SpatterKernelType::gather) { + stats.numValueReads++; + stats.valueBytesRead += pkt->getSize(); + stats.totalValueReadLatency += trip_time; + } else if (spatter_access->type() == SpatterKernelType::scatter) { + stats.numValueWrites++; + stats.valueBytesWritten += pkt->getSize(); + stats.totalValueWriteLatency += trip_time; + } else { + panic("Unknown kernel type."); + } + // CAUTION: We're going to decrement fpRegUsed here, + // it could cause inaccuracies if processNextGenEvent + // is called after recvTimingResp on the same tick. + // i.e. we might end up releasing a register on the same + // cycle that we are allocating it. + // it's probably not going to ever be an issue since + // fpRegFileSize is probably >> requestBufferEntries + // i.e. the chances of running out of fp registers is low because + // we do not simulate parts of the pipeline that back things up into + // fp registers, e.g. functional units of ALU. + fpRegUsed--; + delete spatter_access; + } + + // delete the pkt since we don't need it anymore. + delete pkt; + + if (!nextGenEvent.pending()) { + scheduleNextGenEvent(nextCycle()); + } + + numPendingMemRequests--; + checkForSimExit(); + return true; +} + +void +SpatterGen::addKernel( + uint32_t id, uint32_t delta, uint32_t count, + SpatterKernelType type, + size_t index_size, Addr base_index_addr, + size_t value_size, Addr base_value_addr, + const std::vector& indices +) +{ + DPRINTF( + SpatterGen, + "%s: Adding kernel with id: %d, delta: %d, count: %d, type: %s.\n", + __func__, id, delta, count, SpatterKernelTypeStrings[type] + ); + SpatterKernel new_kernel( + requestorId, + id, delta, count, type, + index_size, base_index_addr, + value_size, base_value_addr + ); + new_kernel.setIndices(indices); + kernels.push(new_kernel); +} + +void +SpatterGen::proceedPastSyncPoint() +{ + assert(mode == SpatterProcessingMode::synchronous); + assert(state == SpatterGenState::WAITING); + state = SpatterGenState::RUNNING; + scheduleNextGenEvent(nextCycle()); +} + +void +SpatterGen::checkForSimExit() +{ + bool no_pending = numPendingMemRequests == 0; + bool no_queued = requestBuffer.empty(); + int avail_int_regs = intRegFileSize - intRegUsed; + int avail_fp_regs = fpRegFileSize - fpRegUsed; + bool can_do_init = initAccessOk(avail_int_regs, avail_fp_regs, curTick()); + bool can_do_mid = interAccessOk(avail_int_regs, avail_fp_regs, curTick()); + bool can_do_ult = ultAccessOk(avail_int_regs, avail_fp_regs, curTick()); + if (!can_do_init && !can_do_mid && !can_do_ult && no_pending && no_queued) + { + assert(( + (mode == SpatterProcessingMode::synchronous) && + (state == SpatterGenState::DRAINING) + ) || + mode == SpatterProcessingMode::asynchronous + ); + state = SpatterGenState::WAITING; + exitSimLoop( + csprintf("%s received all expected responses.", name()), + 0, + nextCycle() + ); + } +} + +bool +SpatterGen::initAccessOk(int int_regs, int fp_regs, Tick when) const +{ + bool have_int_reg = int_regs > 0; + // for mode == SpatterProcessingMode::asynchronous state will always be + // SpatterGenState::RUNNING. we don't have to do checks for mode. + // for mode == SpatterProcessingMode::synchronous, if state is + // SpatterGenState::DRAINING or SpatterGenState::WAITING + // we can't initiate any new indirect accesses. + bool have_kernel = !kernels.empty() && (state == SpatterGenState::RUNNING); + return have_kernel && have_int_reg; +} + +bool +SpatterGen::interAccessOk(int int_regs, int fp_regs, Tick when) const +{ + bool have_int_reg = int_regs > 0; + bool have_index = receiveBuffer.hasReady(when); + bool mid_idx = have_index && (receiveBuffer.front()->tripsLeft() > 1); + return mid_idx && have_int_reg; +} + +bool +SpatterGen::ultAccessOk(int int_regs, int fp_regs, Tick when) const +{ + bool have_fp_reg = fp_regs > 0; + bool have_index = receiveBuffer.hasReady(when); + bool val_idx = have_index && (receiveBuffer.front()->tripsLeft() == 1); + return val_idx && have_fp_reg; +} + +void +SpatterGen::scheduleNextGenEvent(Tick when) +{ + int avail_int_regs = intRegFileSize - intRegUsed; + int avail_fp_regs = fpRegFileSize - fpRegUsed; + bool have_work = initAccessOk(avail_int_regs, avail_fp_regs, curTick()) || + interAccessOk(avail_int_regs, avail_fp_regs, curTick()) || + ultAccessOk(avail_int_regs, avail_fp_regs, curTick()); + Tick schedule_tick = std::max(when, firstGeneratorAvailableTime); + if (have_work && (!nextGenEvent.scheduled())) { + schedule(nextGenEvent, schedule_tick); + firstGeneratorAvailableTime = MaxTick; + } +} + +void +SpatterGen::processNextGenEvent() +{ + assert(!nextGenEvent.pending()); + int req_buf_before = requestBuffer.size(); + // track changes to intRegUsed in this variable and apply it + // at the end of the for loop. This way if we free a register + // in the for loop, other iterations of the for loop won't + // observe this change. This matches what happens in real h/w. + int int_used_now = 0; + // track this independently to prevent different iterations inside + // for loop observing change to h/w resources, i.e we can't rely + // intRegFileSize - intRegUsed to see if we have registers to allocate + // since they don't change until after the for loop + int int_regs_now = intRegFileSize - intRegUsed; + // same explanation as int_used_now + int fp_used_now = 0; + // same explanation as int_regs_now + int fp_regs_now = fpRegFileSize - fpRegUsed; + for (int i = 0; i < requestGenRate; i++) { + if (generatorBusyUntil[i] > curTick()) { + DPRINTF( + SpatterGen, + "%s: AGU[%d] is busy this cycle.\n", __func__, i + ); + continue; + } + if (!(requestBuffer.size() < requestBufferEntries)) { + // if no space left in the requestBuffer sleep + // whoever pops from requestBuffer wakes us up. + nextGenEvent.sleep(); + break; + } + // Now we know that AGU[i] is available and there is room + // in the requestBuffer to put the packet. + if (ultAccessOk(int_regs_now, fp_regs_now, curTick())) { + // occupy one fp register + fp_regs_now--; + fp_used_now++; + // make AGU busy for the next requestGenLatency cycles. + generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency)); + + // create a new packet to access + SpatterAccess* spatter_access = receiveBuffer.front(); + PacketPtr pkt = spatter_access->nextPacket(); + pkt->pushSenderState(spatter_access); + + // push to requestBuffer + requestBuffer.push(pkt, curTick()); + DPRINTF( + SpatterGen, + "%s: Pushed pkt: %s to requestBuffer.\n", + __func__, pkt->print() + ); + + // now deallocate resources for reading the index + int_used_now--; + receiveBuffer.pop(); + } else if (interAccessOk(int_regs_now, fp_regs_now, curTick())) { + // occupy one int register + int_regs_now--; + int_used_now++; + // make AGU busy for the next requestGenLatency cycles. + generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency)); + + // create a new packet to access + SpatterAccess* spatter_access = receiveBuffer.front(); + PacketPtr pkt = spatter_access->nextPacket(); + pkt->pushSenderState(spatter_access); + + // push to requestBuffer + requestBuffer.push(pkt, curTick()); + DPRINTF( + SpatterGen, + "%s: Pushed pkt: %s to requestBuffer.\n", + __func__, pkt->print() + ); + + // now deallocate resources for reading the index + int_used_now--; + receiveBuffer.pop(); + } else if (initAccessOk(int_regs_now, fp_regs_now, curTick())) { + // occupy one int register + int_regs_now--; + int_used_now++; + generatorBusyUntil[i] = clockEdge(Cycles(requestGenLatency)); + + SpatterKernel& front = kernels.front(); + SpatterAccess* spatter_access = front.nextSpatterAccess(); + PacketPtr pkt = spatter_access->nextPacket(); + pkt->pushSenderState(spatter_access); + + requestBuffer.push(pkt, curTick()); + DPRINTF( + SpatterGen, + "%s: Pushed pkt: %s to requestBuffer.\n", + __func__, pkt->print() + ); + + if (front.done()) { + DPRINTF( + SpatterKernel, + "%s: Done with kernel %d type: %s.\n", + __func__, front.id(), + SpatterKernelTypeStrings[front.type()] + ); + kernels.pop(); + // If we're processing synchronously we now have to stop + // making intial accesses and wait everyone to receive + // all expected responses. + if (mode == SpatterProcessingMode::synchronous) { + state = SpatterGenState::DRAINING; + } + } + } else { + // + DPRINTF( + SpatterGen, + "%s: Nothing more could be done this cycle.\n", __func__ + ); + DPRINTF(SpatterGen, "%s: Here is h/w status report: " + "{KERNELS_REMAIN: %d, INDEXES_REMAIN: %d, INT_REG_USED: %d, " + "FP_REG_USED: %d, REQ_BUFF_SIZE: %d}.\n", + __func__, kernels.size(), receiveBuffer.size(), + intRegUsed, fpRegUsed, requestBuffer.size()); + break; + } + } + + // update firstGeneratorAvailableTime after making all changes. + for (int i = 0; i < requestGenRate; i++) { + generatorBusyUntil[i] = std::max(generatorBusyUntil[i], nextCycle()); + firstGeneratorAvailableTime = std::min( + firstGeneratorAvailableTime, + generatorBusyUntil[i] + ); + } + + // now that we have simulated all the work of this cycle, we can + // apply the deltas to the h/w resources. + intRegUsed += int_used_now; + fpRegUsed += fp_used_now; + + bool did_work = (requestBuffer.size() - req_buf_before) > 0; + if (did_work && (!nextSendEvent.pending())) { + scheduleNextSendEvent(nextCycle()); + } + + if (!nextGenEvent.pending()) { + scheduleNextGenEvent(firstGeneratorAvailableTime); + } +} + +void +SpatterGen::scheduleNextSendEvent(Tick when) +{ + bool have_work = !requestBuffer.empty(); + Tick schedule_tick = std::max(when, firstPortAvailableTime); + if (have_work && (!nextSendEvent.scheduled())) { + schedule(nextSendEvent, schedule_tick); + firstPortAvailableTime = MaxTick; + } +} + +void +SpatterGen::processNextSendEvent() +{ + int req_buf_before = requestBuffer.size(); + for (int i = 0; i < sendRate; i++) { + if (portBusyUntil[i] > curTick()) { + DPRINTF( + SpatterGen, + "%s: Port[%d] is busy this cycle.\n", __func__, i + ); + continue; + } + if (requestBuffer.empty()) { + DPRINTF( + SpatterGen, + "%s: No packets to send this cycle.\n", __func__ + ); + break; + } + if (!requestBuffer.hasReady(curTick())) { + DPRINTF( + SpatterGen, + "%s: Packet at front of requestBuffer not ready this cycle.\n", + __func__ + ); + break; + } + PacketPtr pkt = requestBuffer.front(); + DPRINTF( + SpatterGen, + "%s: Sending pkt: %s to port[%d].\n", + __func__, pkt->print(), i + ); + // NOTE: We assume the port will be busy for 1 cycle. + portBusyUntil[i] = clockEdge(Cycles(1)); + port.sendPacket(pkt); + requestBuffer.pop(); + // increase numPendingMemRequests + numPendingMemRequests++; + // record packet departure time + requestDepartureTime[pkt->req] = curTick(); + // Now if we put the port in blocked state no point in continuing + // the loop. also no point in scheduling nextSendEvent. + if (port.blocked()) { + nextSendEvent.sleep(); + break; + } + } + // update firstPortAvailableTime after making all changes. + for (int i = 0; i < sendRate; i++) { + // if the port was not used this cycle, it's busy until nextCycle(). + portBusyUntil[i] = std::max(portBusyUntil[i], nextCycle()); + firstPortAvailableTime = std::min( + firstPortAvailableTime, + portBusyUntil[i] + ); + } + + bool did_work = (req_buf_before - requestBuffer.size()) > 0; + if (did_work && nextGenEvent.pending()) { + // since this event might open up space for output of nextGenEvent, + // it should wake it up if nextGenEvent is asleep. + nextGenEvent.wake(); + scheduleNextGenEvent(nextCycle()); + } + + if (!nextSendEvent.pending()) { + scheduleNextSendEvent(nextCycle()); + } +} + +SpatterGen::SpatterGenStats::SpatterGenStats(SpatterGen* spatter_gen): + statistics::Group(spatter_gen), spatterGen(spatter_gen), + ADD_STAT(numIndexReads, statistics::units::Count::get(), + "Number of reads from the indexer array."), + ADD_STAT(indexBytesRead, statistics::units::Byte::get(), + "Number of bytes read from the indexer array."), + ADD_STAT(totalIndexReadLatency, statistics::units::Tick::get(), + "Total latency for reading from the indexer array."), + ADD_STAT(numValueReads, statistics::units::Count::get(), + "Number of reads from the values array."), + ADD_STAT(numValueWrites, statistics::units::Count::get(), + "Number of writes to the values array."), + ADD_STAT(valueBytesRead, statistics::units::Byte::get(), + "Number of bytes read from the values array."), + ADD_STAT(valueBytesWritten, statistics::units::Byte::get(), + "Number of bytes written to the values array."), + ADD_STAT(totalValueReadLatency, statistics::units::Tick::get(), + "Total latency for reading from the values array."), + ADD_STAT(totalValueWriteLatency, statistics::units::Tick::get(), + "Total latency for writing to the values array."), + ADD_STAT(indexAccessLatency, statistics::units::Tick::get(), + "Distribution of latency for accessing the indexer array."), + ADD_STAT(valueAccessLatency, statistics::units::Tick::get(), + "Distribution of latency for accessing the values array."), + ADD_STAT(totalIndirectAccessLatency, statistics::units::Tick::get(), + "Distribution of total latency for indirect accesses.") +{} + +void +SpatterGen::SpatterGenStats::regStats() +{ + using namespace statistics; + indexAccessLatency.init(8); + valueAccessLatency.init(16); + totalIndirectAccessLatency.init(16); +} + +} // namespace gem5 diff --git a/src/cpu/testers/spatter_gen/spatter_gen.hh b/src/cpu/testers/spatter_gen/spatter_gen.hh new file mode 100644 index 0000000000..1b8a8dbb61 --- /dev/null +++ b/src/cpu/testers/spatter_gen/spatter_gen.hh @@ -0,0 +1,252 @@ +/* +* Copyright (c) 2024 The Regents of The University of California +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__ +#define __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__ + +#include +#include +#include + +#include "base/statistics.hh" +#include "base/stats/group.hh" +#include "cpu/testers/spatter_gen/utility_structs.hh" +#include "enums/SpatterKernelType.hh" +#include "enums/SpatterProcessingMode.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "params/SpatterGen.hh" +#include "sim/clocked_object.hh" +#include "sim/eventq.hh" + +namespace gem5 +{ + + +/** + * @class SpatterGen + * @brief Spatter Kernel Player + * + * This class takes Spatter JSON traces and plays them back in gem5. + * Each trace includes a list of Spatter kernels, which are played in order. + * Kernels are either of type scatter or gather. + * At the time of writing, kernels represent accesses to the memory with + * one level of indirection. + * Initially, an access is made to an array which we call index from now on. + * The index array is streamed through with load accesses. + * In a high level programming language this access will be similar to below. + * "for (int i = 0; i < n; i++) { idx = index[i]; }". + * The value at index[i] is then used to access another array which we will + * call value from now on. + * For scatter type kernels, a random value is stored in the location and + * for gather type kernels, the value is read from the location. + * In a high level programming language this access will be similar to below. + * Scatter + * "for (int i = 0; i < n; i++) { idx = index[i]; value[idx] = rand(); }". + * Gather + * "for (int i = 0; i < n; i++) { idx = index[i]; val = value[idx]; }". + * For more information you can take a look at + * https://github.com/hpcgarage/spatter/blob/main/README.md + * While the readme mentions MultiScatter and MultiGather kernels, the + * trace format is not finalized (at the time of writing). + */ +class SpatterGen: public ClockedObject +{ + private: + typedef enums::SpatterKernelType SpatterKernelType; + typedef enums::SpatterProcessingMode SpatterProcessingMode; + + class SpatterGenEvent : public EventFunctionWrapper + { + private: + // TODO: split pending into pendingInput and pendingOutput + enum class SleepState + { + AWAKE, + ASLEEP + }; + + SleepState _state; + + public: + SpatterGenEvent(const std::function &callback, + const std::string &name): + EventFunctionWrapper(callback, name), _state(SleepState::AWAKE) + {} + // a SpatterGenEvent will only be asleep if it is pending output + bool pending() const { return _state == SleepState::ASLEEP; } + void sleep() { _state = SleepState::ASLEEP; } + void wake() { _state = SleepState::AWAKE; } + }; + + class SpatterGenPort: public RequestPort + { + private: + SpatterGen* owner; + PacketPtr blockedPacket; + + public: + SpatterGenPort(SpatterGen* owner, const std::string& name): + RequestPort(name), owner(owner), blockedPacket(nullptr) {} + + void sendPacket(PacketPtr pkt); + bool blocked() const { return blockedPacket != nullptr; } + + protected: + virtual bool recvTimingResp(PacketPtr pkt) override; + virtual void recvReqRetry() override; + }; + + struct SpatterGenStats: public statistics::Group + { + SpatterGen* spatterGen; + + // TODO: When we enable multiple levels of indirection, we should + // convert this to a vector with one stat for each level of index + statistics::Scalar numIndexReads; + // TODO: When we enable multiple levels of indirection, we should + // convert this to a vector with one stat for each level of index + statistics::Scalar indexBytesRead; + statistics::Scalar totalIndexReadLatency; + + statistics::Scalar numValueReads; + statistics::Scalar numValueWrites; + statistics::Scalar valueBytesRead; + statistics::Scalar valueBytesWritten; + statistics::Scalar totalValueReadLatency; + statistics::Scalar totalValueWriteLatency; + + // TODO: When we enable multiple levels of indirection, we should + // convert this to a vector with one stat for each level of index + statistics::Histogram indexAccessLatency; + statistics::Histogram valueAccessLatency; + statistics::Histogram totalIndirectAccessLatency; + + virtual void regStats() override; + + SpatterGenStats(SpatterGen* spatter_gen); + }; + + enum class SpatterGenState + { + // waiting for all other cores to get to WAITING state, no accesses + WAITING, + // only creating intermediate and ultimate accesses, i.e. wrapping up + DRAINING, + // creating all kinds of accesses, initial, intermediate, and ultimate + RUNNING + }; + + // non param related members + SpatterGenState state; + std::queue kernels; + std::unordered_map requestDepartureTime; + + RequestorID requestorId; + int numPendingMemRequests; + + SpatterGenStats stats; + + void checkForSimExit(); + + bool initAccessOk(int int_regs, int fp_regs, Tick when) const; + bool interAccessOk(int int_regs, int fp_regs, Tick when) const; + bool ultAccessOk(int int_regs, int fp_regs, Tick when) const; + + // param related members (not necessarily one-to-one with params) + SpatterProcessingMode mode; + SpatterGenPort port; + // size of the register files, + // for every memory instruction we need to allocate one register. + int intRegFileSize; + int intRegUsed; + int fpRegFileSize; + int fpRegUsed; + // laterncy to generate A request + int requestGenLatency; + // number of requests generated per event + int requestGenRate; + // tracking smallest tick when at least one "AGU" is available; + Tick firstGeneratorAvailableTime; + // tracking the busy state of our so called "AGU"s. + std::vector generatorBusyUntil; + SpatterGenEvent nextGenEvent; + void processNextGenEvent(); + // put requests to the cache in the request buffer. + int requestBufferEntries; + // store request packet along with their insertion time into this queue. + TimedQueue requestBuffer; + // if nextGenEvent has to be schedule at tick when then schedule it. + // this function should only be called when nextGenEvent is not pending. + void scheduleNextGenEvent(Tick when); + + // bandwidth to issue memory requests to cache, + // this is supposed to model the number of cache ports + // we will assume it takes 1 cycle to issue memory requests + int sendRate; + Tick firstPortAvailableTime; + std::vector portBusyUntil; + SpatterGenEvent nextSendEvent; + void processNextSendEvent(); + // if nextSendEvent has to be schedule at tick when then schedule it. + // this function should only be called when nextSendEvent is not pending. + void scheduleNextSendEvent(Tick when); + + // put the memory responses here. + // no need to limit the size of this buffer. + // it's a response buffer and it will automatically + // be limited by requestBufferEntries, intRegFileSize, fpRegFileSize + TimedQueue receiveBuffer; + + public: + PARAMS(SpatterGen); + SpatterGen(const Params& params); + + Port& + getPort(const std::string& if_name, PortID idx = InvalidPortID) override; + + virtual void startup() override; + + void recvReqRetry(); + bool recvTimingResp(PacketPtr pkt); + + // PyBindMethod to interface adding a kernel with python JSON frontend. + void addKernel( + uint32_t id, uint32_t delta, uint32_t count, + SpatterKernelType type, + size_t index_size, Addr base_index_addr, + size_t value_size, Addr base_value_addr, + const std::vector& indices + ); + + void proceedPastSyncPoint(); +}; + +} // namespace gem5 + +#endif // __CPU_TESTERS_SPATTER_GEN_SPATTER_GEN_HH__ diff --git a/src/cpu/testers/spatter_gen/utility_structs.hh b/src/cpu/testers/spatter_gen/utility_structs.hh new file mode 100644 index 0000000000..21bff9e8ae --- /dev/null +++ b/src/cpu/testers/spatter_gen/utility_structs.hh @@ -0,0 +1,242 @@ +/* +* Copyright (c) 2024 The Regents of The University of California +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions are +* met: redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer; +* redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution; +* neither the name of the copyright holders nor the names of its +* contributors may be used to endorse or promote products derived from +* this software without specific prior written permission. +* +* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__ +#define __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__ + +#include +#include + +#include "base/random.hh" +#include "base/types.hh" +#include "enums/SpatterKernelType.hh" +#include "mem/packet.hh" + +namespace gem5 +{ + +template +class TimedQueue +{ + private: + Tick latency; + + std::queue items; + std::queue insertionTimes; + + public: + TimedQueue(Tick latency): latency(latency) {} + + void push(T item, Tick insertion_time) + { + items.push(item); + insertionTimes.push(insertion_time); + } + + void pop() + { + items.pop(); + insertionTimes.pop(); + } + + T front() const { return items.front(); } + + bool empty() const { return items.empty(); } + + size_t size() const { return items.size(); } + + bool hasReady(Tick current_time) const + { + if (empty()) { + return false; + } + return (current_time - insertionTimes.front()) >= latency; + } +}; + + + +// Represents a single access to a SpatterKernel. +// It supports multiple levels of indirection. +// However, the SpatterKernel class only works with one level of +// indirection (i.e. accessing value[index[i]]). +struct SpatterAccess : public Packet::SenderState +{ + typedef std::tuple AccessPair; + typedef enums::SpatterKernelType SpatterKernelType; + + RequestorID requestorId; + SpatterKernelType _kernelType; + Tick accTripTime; + std::queue accessPairs; + + SpatterAccess( + RequestorID requestor_id, + SpatterKernelType kernel_type, + const std::queue& access_pairs + ): + requestorId(requestor_id), _kernelType(kernel_type), + accTripTime(0), accessPairs(access_pairs) + {} + + SpatterKernelType type() const { return _kernelType; } + + int tripsLeft() const { return accessPairs.size(); } + + void recordTripTime(Tick trip_time) { accTripTime += trip_time; } + + Tick tripTimeSoFar() const { return accTripTime; } + + AccessPair nextAccessPair() + { + assert(tripsLeft() > 0); + AccessPair access_pair = accessPairs.front(); + accessPairs.pop(); + return access_pair; + } + + PacketPtr nextPacket() + { + Addr addr; + size_t size; + std::tie(addr, size) = nextAccessPair(); + MemCmd cmd; + if (tripsLeft() >= 1){ + cmd = MemCmd::ReadReq; + } else { + cmd = _kernelType == \ + SpatterKernelType::gather ? MemCmd::ReadReq : MemCmd::WriteReq; + } + return createPacket(addr, size, cmd); + } + + PacketPtr createPacket(Addr addr, size_t size, MemCmd cmd) const + { + RequestPtr req = std::make_shared(addr, size, 0, requestorId); + + // Dummy PC to have PC-based prefetchers latch on; + // get entropy into higher bits + // This piece of code is directly copied from + // gem5::TrafficGen:: + req->setPC(((Addr) requestorId) << 2); + PacketPtr pkt = new Packet(req, cmd); + uint8_t* pkt_data = new uint8_t[req->getSize()]; + // Randomly intialize pkt_data, for testing cache coherence. + for (int i = 0; i < req->getSize(); i++) { + pkt_data[i] = random_mt.random(); + } + pkt->dataDynamic(pkt_data); + return pkt; + } +}; + +class SpatterKernel +{ + private: + typedef enums::SpatterKernelType SpatterKernelType; + typedef SpatterAccess::AccessPair AccessPair; + + RequestorID requestorId; + uint32_t _id; + uint32_t delta; + uint32_t count; + + SpatterKernelType _type; + + size_t indexSize; + Addr baseIndexAddr; + + size_t valueSize; + Addr baseValueAddr; + + // needed to iterate over indices multiple times. + uint32_t index; + // current iteration over indices + uint32_t iteration; + + // number of times we have left to roll indices to finish one iteration. + uint32_t remRolls; + std::deque indices; + + public: + + SpatterKernel( + RequestorID requestor_id, + uint32_t id, uint32_t delta, uint32_t count, + SpatterKernelType type, + size_t index_size, Addr base_index_addr, + size_t value_size, Addr base_value_addr + ): + requestorId(requestor_id), + _id(id), delta(delta), count(count), + _type(type), + indexSize(index_size), baseIndexAddr(base_index_addr), + valueSize(value_size), baseValueAddr(base_value_addr), + index(0), iteration(0), remRolls(0) + {} + + uint32_t id() const { return _id; } + + void setIndices(const std::vector& pattern) + { + indices.assign(pattern.begin(), pattern.end()); + remRolls = indices.size(); + } + + SpatterKernelType type() const { return _type; } + + bool done() const { return iteration == count; } + + SpatterAccess* nextSpatterAccess() + { + std::queue access_pairs; + Addr index_addr = baseIndexAddr + (index * indexSize); + access_pairs.emplace(index_addr, indexSize); + // update index in the index array + index++; + + uint32_t front = indices.front(); + uint32_t value_index = (delta * iteration) + front; + Addr value_addr = baseValueAddr + (value_index * valueSize); + access_pairs.emplace(value_addr, valueSize); + // roll indices + indices.pop_front(); + indices.push_back(front); + remRolls--; + if (remRolls == 0) { + remRolls = indices.size(); + iteration++; + } + + return new SpatterAccess(requestorId, _type, access_pairs); + } +}; + +} // namespace gem5 + +#endif // __CPU_TESTERS_SPATTER_GEN_UTILITY_STRUCTS_HH__ diff --git a/src/python/SConscript b/src/python/SConscript index fc2241fa09..af117e4a14 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -252,6 +252,14 @@ PySource('gem5.components.processors', 'gem5/components/processors/random_generator_core.py') PySource('gem5.components.processors', 'gem5/components/processors/random_generator.py') +PySource('gem5.components.processors.spatter_gen', + 'gem5/components/processors/spatter_gen/__init__.py') +PySource('gem5.components.processors.spatter_gen', + 'gem5/components/processors/spatter_gen/spatter_generator_core.py') +PySource('gem5.components.processors.spatter_gen', + 'gem5/components/processors/spatter_gen/spatter_generator.py') +PySource('gem5.components.processors.spatter_gen', + 'gem5/components/processors/spatter_gen/spatter_kernel.py') PySource('gem5.components.processors', 'gem5/components/processors/simple_core.py') PySource('gem5.components.processors', diff --git a/src/python/gem5/components/processors/spatter_gen/__init__.py b/src/python/gem5/components/processors/spatter_gen/__init__.py new file mode 100644 index 0000000000..3c1847b914 --- /dev/null +++ b/src/python/gem5/components/processors/spatter_gen/__init__.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +from .spatter_generator import SpatterGenerator +from .spatter_kernel import ( + SpatterKernel, + parse_kernel, + partition_trace, +) diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_generator.py b/src/python/gem5/components/processors/spatter_gen/spatter_generator.py new file mode 100644 index 0000000000..72939f82dc --- /dev/null +++ b/src/python/gem5/components/processors/spatter_gen/spatter_generator.py @@ -0,0 +1,147 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import ( + List, + Optional, + Union, +) + +from m5.objects import ( + SpatterProcessingMode, + SrcClockDomain, + VoltageDomain, +) +from m5.stats import dump as dump_stats +from m5.stats import reset as reset_stats +from m5.util import fatal + +from ....utils.override import overrides +from ..abstract_generator import AbstractGenerator +from .spatter_generator_core import SpatterGeneratorCore +from .spatter_kernel import SpatterKernel + + +class SpatterGenerator(AbstractGenerator): + def __init__( + self, + num_cores: int = 1, + processing_mode: Union[SpatterProcessingMode, str] = "synchronous", + int_regfile_size: int = 384, + fp_regfile_size: int = 224, + request_gen_latency: int = 2, + request_gen_rate: int = 4, + request_buffer_entries: int = 32, + send_rate: int = 2, + clk_freq: Optional[str] = None, + ) -> None: + super().__init__( + cores=self._create_cores( + num_cores, + processing_mode, + int_regfile_size, + fp_regfile_size, + request_gen_latency, + request_gen_rate, + request_buffer_entries, + send_rate, + ) + ) + # no need for else block since it will intialize generator.clk_domain + # the clock domain of its closest ancestor in the SimObject tree. + if not clk_freq is None: + clock_domain = SrcClockDomain( + clock=clk_freq, voltage_domain=VoltageDomain() + ) + for generator in self.cores: + generator.clk_domain = clock_domain + + self._num_kernels = 0 + self._sync = processing_mode == "synchronous" + + def _create_cores( + self, + num_cores: int, + processing_mode: Union[SpatterProcessingMode, str], + int_regfile_size: int, + fp_regfile_size: int, + request_gen_latency: int, + request_gen_rate: int, + request_buffer_entries: int, + send_rate: int, + ) -> List[SpatterGeneratorCore]: + return [ + SpatterGeneratorCore( + processing_mode, + int_regfile_size, + fp_regfile_size, + request_gen_latency, + request_gen_rate, + request_buffer_entries, + send_rate, + ) + for _ in range(num_cores) + ] + + def add_kernel(self, kernels: List[SpatterKernel]) -> None: + assert len(kernels) == len(self.cores) + for core, kernel in zip(self.cores, kernels): + if kernel.empty(): + fatal( + f"Cannot add {kernel} since it's empty. " + "At the moment SpatterGenerator (or gem5::SpatterGen) " + "does not support adding empty kernels to cores. As a " + "temporary fix you can try adding 1 dummy element to the " + "trace. You can also set fix_empty_trace to True in the " + "constructor of the SpatterKernel which automatically " + "inserts a dummy element (0) to the trace." + ) + core.add_kernel(kernel) + self._num_kernels += 1 + + @overrides(AbstractGenerator) + def start_traffic(self) -> None: + for core in self.cores: + core.start_traffic() + + def _proceed_past_sync_point(self) -> None: + if not self._sync: + return + for core in self.cores: + core.generator.proceedPastSyncPoint() + + def handle_spatter_exit(self): + spatter_exits_observed = 0 + sync_points_observed = 0 + sync_points_expected = self._num_kernels if self._sync else 1 + while True: + spatter_exits_observed += 1 + if spatter_exits_observed % len(self.cores) == 0: + sync_points_observed += 1 + dump_stats() + reset_stats() + self._proceed_past_sync_point() + yield not (sync_points_observed < sync_points_expected) diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py b/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py new file mode 100644 index 0000000000..50799eae84 --- /dev/null +++ b/src/python/gem5/components/processors/spatter_gen/spatter_generator_core.py @@ -0,0 +1,73 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Union + +from m5.objects import ( + Port, + SpatterGen, + SpatterProcessingMode, +) + +from ....utils.override import overrides +from ..abstract_core import AbstractCore +from ..abstract_generator_core import AbstractGeneratorCore +from .spatter_kernel import SpatterKernel + + +class SpatterGeneratorCore(AbstractGeneratorCore): + def __init__( + self, + processing_mode: Union[SpatterProcessingMode, str], + int_regfile_size: int, + fp_regfile_size: int, + request_gen_latency: int, + request_gen_rate: int, + request_buffer_entries: int, + send_rate: int, + ): + super().__init__() + self.generator = SpatterGen( + processing_mode=processing_mode, + int_regfile_size=int_regfile_size, + fp_regfile_size=fp_regfile_size, + request_gen_latency=request_gen_latency, + request_gen_rate=request_gen_rate, + request_buffer_entries=request_buffer_entries, + send_rate=send_rate, + ) + self._kernels = [] + + @overrides(AbstractCore) + def connect_dcache(self, port: Port) -> None: + self.generator.port = port + + def add_kernel(self, kernel: SpatterKernel) -> None: + self._kernels.append(kernel) + + def start_traffic(self) -> None: + for kernel in self._kernels: + self.generator.addKernel(*kernel.cxx_call_args()) diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py new file mode 100644 index 0000000000..4cf0ee814a --- /dev/null +++ b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py @@ -0,0 +1,200 @@ +# Copyright (c) 2024 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from math import ceil +from typing import ( + List, + Tuple, +) + +from m5.objects import SpatterKernelType +from m5.params import Addr +from m5.util import inform + + +def parse_kernel(kernel: dict, default_delta=8) -> Tuple[int, int, str, List]: + delta = kernel.get("delta", default_delta) + if delta < 0: + inform( + f"Negative delta found: {delta}. Setting it to {default_delta}." + ) + delta = default_delta + count = kernel.get("count", 1) + type = kernel.get("kernel", None) + if type is None: + raise ValueError(f"Keyword 'kernel' not found.") + type = SpatterKernelType(type.lower()) + trace = kernel.get("pattern", []) + if len(trace) == 0: + raise ValueError(f"Empty 'pattern' found.") + return (delta, count, type, trace) + + +def partition_trace(original_trace, num_partitions, interleave_size): + partitions = [[] for _ in range(num_partitions)] + num_leaves = ceil(len(original_trace) / interleave_size) + for i in range(num_leaves): + lower_bound = i * interleave_size + upper_bound = min(lower_bound + interleave_size, len(original_trace)) + partitions[i % num_partitions] += original_trace[ + lower_bound:upper_bound + ] + return partitions + + +class SpatterKernel: + """This class encapsulates one kernel in a spatter trace. + A spatter trace is represented with a json file. + An example of a spatter trace can be found here: + https://github.com/hpcgarage/spatter/blob/main/standard-suite/app-traces/amg.json + Each trace may have multiple kernels. + Each kernel represents a code execution like below + for (int iteration = 0; iteration < count; iteration++) + { + for (int i = 0; i < N; i++) { + value[index[i] + iteration * delta] = rand(); // kernel: scatter + // OR + sum += value[index[i] + iteration * delta]; // kernel: gather + } + } + Where `delta` and `count` are fields in each kernel. + `kernel` is another field that determines whether the accesses to value + are loads or stores. + The field `pattern` stores the index array. + + This file provides two utility function to parse spatter traces: + parse_kernel: takes a dictionary and returns a tuple of + delta, count, type, and trace. + partition_trace: takes the original trace, number of partitions, + and interleave_size. + It returns a list of `num_partitions` partitions where each partition + is an list including interleaved elements from `original_trace`. + The elements in the `original_trace` are interleaved with a + granularity of `interleave_size`. + The code snippet below shows how to use these functions to create kernels. + generator = SpatterGenerator(num_cores) + + with open(trace_path, "r") as trace_file: + kernels = json.load(trace_file) + + for i, kernel in enumerate(kernels): + delta, count, type, og_trace = parse_kernel(kernel) + traces = partition_trace(og_trace, num_cores, 128) + kernels = [SpatterKernel( + kernel_id=i, + kernel_delta=delta, + kernel_count=count, + kernel_type=type, + kernel_trace=trace, + index_size=4, + base_index_addr=0, + value_size=8, + base_value_addr=0x400000000 + ) + for trace in traces + ] + generator.add_kernel(kernels) + + Args: + kernel_id (int): The ID of the kernel. + User defined, i.e. spatter traces don't have this field. + It's used to identify the kernel in the simulation. + kernel_delta (int): The delta value of the kernel. + `delta` from spatter trace. + kernel_count (int): The count value of the kernel. + `count` from spatter trace. + kernel_type (SpatterKernelType): The type of the kernel. + `kernel` from spatter trace. + kernel_trace (List[int]): The elements of the `index` array. + `pattern` from spatter trace. + index_size (int): The size of elements in `index`. + User defined, i.e. spatter traces don't have this field. + It represents the size of elements in the `index` array in code above. + base_index_addr (Addr): The base address of the index. + User defined, i.e. spatter traces don't have this field. + It represents the pointer to the `index` array in the code above. + value_size (int): The size of elements in `value`. + User defined, i.e. spatter traces don't have this field. + It represents the size of elements in the `value` array in code above. + base_value_addr (Addr): The base address of the value. + User defined, i.e. spatter traces don't have this field. + It represents the pointer to the `value` array in the code above. + """ + + def __init__( + self, + kernel_id: int, + kernel_delta: int, + kernel_count: int, + kernel_type: SpatterKernelType, + kernel_trace: List[int], + index_size: int, + base_index_addr: Addr, + value_size: int, + base_value_addr: Addr, + fix_empty_trace: bool = False, + ): + self._id = kernel_id + self._delta = kernel_delta + self._count = kernel_count + self._trace = kernel_trace + self._type = kernel_type + self._index_size = index_size + self._base_index_addr = base_index_addr + self._value_size = value_size + self._base_value_addr = base_value_addr + + if fix_empty_trace and len(kernel_trace) == 0: + inform( + "Empty trace found. Fixing it by adding a dummy element. " + "Also setting delta to 0 and count to 1.", + ) + self._trace = [0] + self._delta = 0 + self._count = 1 + + def empty(self): + return len(self._trace) == 0 + + def cxx_call_args(self): + return [ + self._id, + self._delta, + self._count, + self._type.getValue(), + self._index_size, + self._base_index_addr, + self._value_size, + self._base_value_addr, + self._trace, + ] + + def __str__(self): + return ( + f"SpatterKernel(id={self._id}, delta={self._delta}, " + f"count={self._count}, type={self._type}, " + f"trace[:8]={self._trace[:8]}" + ) diff --git a/src/python/gem5/simulate/exit_event.py b/src/python/gem5/simulate/exit_event.py index b902643a3f..5a0bb3d1d7 100644 --- a/src/python/gem5/simulate/exit_event.py +++ b/src/python/gem5/simulate/exit_event.py @@ -39,6 +39,7 @@ class ExitEvent(Enum): EXIT = "exit" # A standard vanilla exit. WORKBEGIN = "workbegin" # An exit because a ROI has been reached. WORKEND = "workend" # An exit because a ROI has ended. + SPATTER_EXIT = "spatter exit" # An exit because a spatter core has ended. SWITCHCPU = "switchcpu" # An exit needed to switch CPU cores. FAIL = "fail" # An exit because the simulation has failed. CHECKPOINT = "checkpoint" # An exit to load a checkpoint. @@ -115,6 +116,8 @@ class ExitEvent(Enum): elif exit_string.endswith("is finished updating the memory.\n"): # This is for the gups generator exit event return ExitEvent.EXIT + elif exit_string.endswith("received all expected responses."): + return ExitEvent.SPATTER_EXIT raise NotImplementedError( f"Exit event '{exit_string}' not implemented" ) diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index 4d18b4cee0..b237b064e2 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -36,6 +36,7 @@ from m5.util import warn from gem5.resources.looppoint import Looppoint from ..components.processors.abstract_processor import AbstractProcessor +from ..components.processors.spatter_gen import SpatterGenerator from ..components.processors.switchable_processor import SwitchableProcessor from ..resources.resource import SimpointResource @@ -221,3 +222,9 @@ def looppoint_save_checkpoint_generator( yield False yield True + + +def spatter_exit_generator(spatter_gen: SpatterGenerator): + while True: + assert isinstance(spatter_gen, SpatterGenerator) + yield from spatter_gen.handle_spatter_exit() diff --git a/src/python/gem5/simulate/simulator.py b/src/python/gem5/simulate/simulator.py index 5a5cf9af89..66f67d6ffb 100644 --- a/src/python/gem5/simulate/simulator.py +++ b/src/python/gem5/simulate/simulator.py @@ -53,6 +53,7 @@ from .exit_event_generators import ( reset_stats_generator, save_checkpoint_generator, skip_generator, + spatter_exit_generator, switch_generator, warn_default_decorator, ) @@ -281,6 +282,12 @@ class Simulator: "creating a checkpoint and continuing", )(), ExitEvent.FAIL: exit_generator(), + ExitEvent.SPATTER_EXIT: warn_default_decorator( + spatter_exit_generator, + "spatter exit", + "dumping and resetting stats after each sync point. " + "Note that there will be num_cores*sync_points spatter_exits.", + )(spatter_gen=board.get_processor()), ExitEvent.SWITCHCPU: warn_default_decorator( switch_generator, "switch CPU", @@ -518,9 +525,11 @@ class Simulator: self._board._pre_instantiate() root = Root( - full_system=self._full_system - if self._full_system is not None - else self._board.is_fullsystem(), + full_system=( + self._full_system + if self._full_system is not None + else self._board.is_fullsystem() + ), board=self._board, )