From 1415308d103f73085239791f6578885fce315d37 Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 14 Jul 2021 15:53:57 -0500 Subject: [PATCH] mem-ruby: Account for misaligned accesses in GPUCoalescer Previously, we assumed that the maximum number of requests that would be issued by an instruction was equal to the number of threads that were active for that instruction. However, if a thread has an access that crosses a cache line, that thread has a misaligned access, and needs to request both cache lines. This patch takes that into account by checking the status vector for each thread in that instruction to determine the number of requests. Change-Id: I1994962c46d504b48654dbd22bcd786c9f382fd9 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/48341 Tested-by: kokoro Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair --- src/mem/ruby/system/GPUCoalescer.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index c00e7c0986..2390ba6c47 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -645,7 +645,10 @@ GPUCoalescer::makeRequest(PacketPtr pkt) // of the exec_mask. int num_packets = 1; if (!m_usingRubyTester) { - num_packets = getDynInst(pkt)->exec_mask.count(); + num_packets = 0; + for (int i = 0; i < TheGpuISA::NumVecElemPerVecReg; i++) { + num_packets += getDynInst(pkt)->getLaneStatus(i); + } } // the pkt is temporarily stored in the uncoalesced table until