From 7ff1e381c98faeafd02a90269969d3c59cee94df Mon Sep 17 00:00:00 2001 From: Mahyar Samani Date: Thu, 20 Jun 2024 11:24:44 -0700 Subject: [PATCH] cpu,stdlib: Fix Access Trace for Accessing Indices in SpatterGen (#1258) This change fixes the way indices are generated in a multi generator setup. It changes it from all cores generating the same trace of indices for accessing the index array to each core generating an interleaved subset of indices. For an example look below for traces (indices to index array) in a 2 core setup. Before: core_0: 0, 1, 2, 3, 4, 5, 6, 7, ... core_1: 0, 1, 2, 3, 4, 5, 6, 7, ... After: core_0: 0, 1, 2, 3, 8, 9, 10, 11, ... core_1: 4, 5, 6, 7, 12, 13, 14, 15, ... Additionally, this change fixes the SpatterKernel class in the standard library to comply with the change in the SpatterGen source code. --- src/cpu/testers/spatter_gen/spatter_gen.cc | 2 + src/cpu/testers/spatter_gen/spatter_gen.hh | 2 +- .../testers/spatter_gen/utility_structs.hh | 41 ++++++++++++++++--- .../processors/spatter_gen/spatter_kernel.py | 13 +++++- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/cpu/testers/spatter_gen/spatter_gen.cc b/src/cpu/testers/spatter_gen/spatter_gen.cc index b57259911b..fb9e6427dd 100644 --- a/src/cpu/testers/spatter_gen/spatter_gen.cc +++ b/src/cpu/testers/spatter_gen/spatter_gen.cc @@ -205,6 +205,7 @@ void SpatterGen::addKernel( uint32_t id, uint32_t delta, uint32_t count, SpatterKernelType type, + uint32_t base_index, uint32_t indices_per_stride, uint32_t stride, size_t index_size, Addr base_index_addr, size_t value_size, Addr base_value_addr, const std::vector& indices @@ -218,6 +219,7 @@ SpatterGen::addKernel( SpatterKernel new_kernel( requestorId, id, delta, count, type, + base_index, indices_per_stride, stride, index_size, base_index_addr, value_size, base_value_addr ); diff --git a/src/cpu/testers/spatter_gen/spatter_gen.hh b/src/cpu/testers/spatter_gen/spatter_gen.hh index 1b8a8dbb61..b296ab8934 100644 --- a/src/cpu/testers/spatter_gen/spatter_gen.hh +++ b/src/cpu/testers/spatter_gen/spatter_gen.hh @@ -234,11 +234,11 @@ class SpatterGen: public ClockedObject void recvReqRetry(); bool recvTimingResp(PacketPtr pkt); - // PyBindMethod to interface adding a kernel with python JSON frontend. void addKernel( uint32_t id, uint32_t delta, uint32_t count, SpatterKernelType type, + uint32_t base_index, uint32_t indices_per_stride, uint32_t stride, size_t index_size, Addr base_index_addr, size_t value_size, Addr base_value_addr, const std::vector& indices diff --git a/src/cpu/testers/spatter_gen/utility_structs.hh b/src/cpu/testers/spatter_gen/utility_structs.hh index 21bff9e8ae..d64cd481c5 100644 --- a/src/cpu/testers/spatter_gen/utility_structs.hh +++ b/src/cpu/testers/spatter_gen/utility_structs.hh @@ -161,7 +161,38 @@ class SpatterKernel typedef enums::SpatterKernelType SpatterKernelType; typedef SpatterAccess::AccessPair AccessPair; + class IndexGen + { + private: + uint32_t indicesPerStride; + uint32_t stride; + + uint32_t next; + public: + IndexGen(): indicesPerStride(0), stride(0), next(0) + {} + + IndexGen(uint32_t base_index, + uint32_t indices_per_stride, + uint32_t stride_size): + indicesPerStride(indices_per_stride), + stride(stride_size), next(base_index) + {} + + uint32_t nextIndex() { + uint32_t ret = next; + // update next index + next++; + if (next % indicesPerStride == 0) { + next += (stride - indicesPerStride); + } + return ret; + } + }; + RequestorID requestorId; + IndexGen indexGen; + uint32_t _id; uint32_t delta; uint32_t count; @@ -174,8 +205,6 @@ class SpatterKernel size_t valueSize; Addr baseValueAddr; - // needed to iterate over indices multiple times. - uint32_t index; // current iteration over indices uint32_t iteration; @@ -189,15 +218,17 @@ class SpatterKernel RequestorID requestor_id, uint32_t id, uint32_t delta, uint32_t count, SpatterKernelType type, + uint32_t base_index, uint32_t indices_per_stride, uint32_t stride, size_t index_size, Addr base_index_addr, size_t value_size, Addr base_value_addr ): requestorId(requestor_id), + indexGen(base_index, indices_per_stride, stride), _id(id), delta(delta), count(count), _type(type), indexSize(index_size), baseIndexAddr(base_index_addr), valueSize(value_size), baseValueAddr(base_value_addr), - index(0), iteration(0), remRolls(0) + iteration(0), remRolls(0) {} uint32_t id() const { return _id; } @@ -215,10 +246,10 @@ class SpatterKernel SpatterAccess* nextSpatterAccess() { std::queue access_pairs; + // get the next index for the index array + uint32_t index = indexGen.nextIndex(); Addr index_addr = baseIndexAddr + (index * indexSize); access_pairs.emplace(index_addr, indexSize); - // update index in the index array - index++; uint32_t front = indices.front(); uint32_t value_index = (delta * iteration) + front; diff --git a/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py index 4cf0ee814a..8b79b685c5 100644 --- a/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py +++ b/src/python/gem5/components/processors/spatter_gen/spatter_kernel.py @@ -150,22 +150,28 @@ class SpatterKernel: kernel_delta: int, kernel_count: int, kernel_type: SpatterKernelType, - kernel_trace: List[int], + base_index: int, + indices_per_stride: int, + stride_size: int, index_size: int, base_index_addr: Addr, value_size: int, base_value_addr: Addr, + kernel_trace: List[int], fix_empty_trace: bool = False, ): self._id = kernel_id self._delta = kernel_delta self._count = kernel_count - self._trace = kernel_trace self._type = kernel_type + self._base_index = base_index + self._indices_per_stride = indices_per_stride + self._stride_size = stride_size self._index_size = index_size self._base_index_addr = base_index_addr self._value_size = value_size self._base_value_addr = base_value_addr + self._trace = kernel_trace if fix_empty_trace and len(kernel_trace) == 0: inform( @@ -185,6 +191,9 @@ class SpatterKernel: self._delta, self._count, self._type.getValue(), + self._base_index, + self._indices_per_stride, + self._stride_size, self._index_size, self._base_index_addr, self._value_size,