From e957a882ed80e176105cbacb6ef5db76d851e07f Mon Sep 17 00:00:00 2001 From: Jarvis Jia Date: Tue, 18 Jun 2024 22:19:44 -0500 Subject: [PATCH] gpu-compute,mem-ruby: Add RubyHitMiss flag for TCP and TCC cache Add hit and miss print for TCP and TCC cache with RubyHitMiss debug flag Change-Id: I40ae3449020b917f39ac91d29fa4e1dd7c791e7b --- src/mem/ruby/protocol/GPU_VIPER-TCC.sm | 2 + src/mem/ruby/protocol/GPU_VIPER-TCP.sm | 10 ++--- src/mem/ruby/protocol/GPU_VIPER-msg.sm | 12 +++--- src/mem/ruby/system/GPUCoalescer.cc | 51 +++++++++++++++----------- src/mem/ruby/system/GPUCoalescer.hh | 22 ++++++----- 5 files changed, 56 insertions(+), 41 deletions(-) diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index da4318bcf9..69a4cb8c73 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -655,10 +655,12 @@ machine(MachineType:TCC, "TCC Cache") action(p_profileMiss, "pm", desc="Profile cache miss") { L2cache.profileDemandMiss(); + DPRINTF(RubyHitMiss, "in TCC miss at %#lx\n", address); } action(p_profileHit, "ph", desc="Profile cache hit") { L2cache.profileDemandHit(); + DPRINTF(RubyHitMiss, "in TCC hit at %#lx\n", address); } action(t_allocateTBE, "t", desc="allocate TBE Entry") { diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 0d740ef473..bd32d280cd 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -379,7 +379,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, tmp, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, tmp); + coalescer.readCallback(address, MachineType:L1Cache, tmp, false); } if(is_valid(cache_entry)) { unset_cache_entry(); @@ -404,7 +404,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, false); } } else { enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { @@ -565,7 +565,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, true, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, false); } } @@ -574,7 +574,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, false); } } @@ -587,7 +587,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, tmp, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, tmp); + coalescer.readCallback(address, MachineType:L1Cache, tmp, false); } } } diff --git a/src/mem/ruby/protocol/GPU_VIPER-msg.sm b/src/mem/ruby/protocol/GPU_VIPER-msg.sm index 106433f2c5..020fff18a6 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-msg.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-msg.sm @@ -31,11 +31,11 @@ structure (GPUCoalescer, external = "yes") { void readCallback(Addr, DataBlock); - void readCallback(Addr, MachineType, DataBlock); - void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles); + void readCallback(Addr, MachineType, DataBlock, bool); void readCallback(Addr, MachineType, DataBlock, Cycles, Cycles, Cycles, bool); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool, bool); void writeCallback(Addr, DataBlock); void writeCallback(Addr, MachineType, DataBlock); void writeCallback(Addr, MachineType, DataBlock, @@ -49,11 +49,11 @@ structure (GPUCoalescer, external = "yes") { structure (VIPERCoalescer, external = "yes") { void readCallback(Addr, DataBlock); - void readCallback(Addr, MachineType, DataBlock); - void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles); + void readCallback(Addr, MachineType, DataBlock, bool); void readCallback(Addr, MachineType, DataBlock, Cycles, Cycles, Cycles, bool); + void readCallback(Addr, MachineType, DataBlock, + Cycles, Cycles, Cycles, bool, bool); void writeCallback(Addr, DataBlock); void writeCallback(Addr, MachineType, DataBlock); void writeCallback(Addr, MachineType, DataBlock, diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 5ee4105597..c037d127f7 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -38,6 +38,7 @@ #include "debug/GPUCoalescer.hh" #include "debug/MemoryAccess.hh" #include "debug/ProtocolTrace.hh" +#include "debug/RubyHitMiss.hh" #include "debug/RubyPort.hh" #include "debug/RubyStats.hh" #include "gpu-compute/shader.hh" @@ -438,7 +439,7 @@ GPUCoalescer::writeCallback(Addr address, auto crequest = coalescedTable.at(address).front(); hitCallback(crequest, mach, data, true, crequest->getIssueTime(), - forwardRequestTime, firstResponseTime, isRegion); + forwardRequestTime, firstResponseTime, isRegion, false); // remove this crequest in coalescedTable delete crequest; @@ -485,29 +486,16 @@ GPUCoalescer::writeCompleteCallback(Addr address, void GPUCoalescer::readCallback(Addr address, DataBlock& data) { - readCallback(address, MachineType_NULL, data); -} - -void -GPUCoalescer::readCallback(Addr address, - MachineType mach, - DataBlock& data) -{ - readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); + readCallback(address, MachineType_NULL, data, false); } void GPUCoalescer::readCallback(Addr address, MachineType mach, DataBlock& data, - Cycles initialRequestTime, - Cycles forwardRequestTime, - Cycles firstResponseTime) + bool externalHit = false) { - - readCallback(address, mach, data, - initialRequestTime, forwardRequestTime, firstResponseTime, - false); + readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0), externalHit); } void @@ -517,7 +505,23 @@ GPUCoalescer::readCallback(Addr address, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion) + bool externalHit = false) +{ + + readCallback(address, mach, data, + initialRequestTime, forwardRequestTime, firstResponseTime, + false, externalHit); +} + +void +GPUCoalescer::readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion, + bool externalHit = false) { assert(address == makeLineAddress(address)); assert(coalescedTable.count(address)); @@ -527,7 +531,7 @@ GPUCoalescer::readCallback(Addr address, "readCallback received non-read type response\n"); hitCallback(crequest, mach, data, true, crequest->getIssueTime(), - forwardRequestTime, firstResponseTime, isRegion); + forwardRequestTime, firstResponseTime, isRegion, externalHit); delete crequest; coalescedTable.at(address).pop_front(); @@ -547,7 +551,8 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion) + bool isRegion, + externalHit = false) { PacketPtr pkt = crequest->getFirstPkt(); Addr request_address = pkt->getAddr(); @@ -558,6 +563,10 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, DPRINTF(GPUCoalescer, "Got hitCallback for 0x%X\n", request_line_address); + DPRINTF(RubyHitMiss, "GPU TCP Cache %s at %#x\n", + externalHit ? "hit" : "miss", + printAddress(request_address)); + recordMissLatency(crequest, mach, initialRequestTime, forwardRequestTime, @@ -956,7 +965,7 @@ GPUCoalescer::atomicCallback(Addr address, "atomicCallback saw non-atomic type response\n"); hitCallback(crequest, mach, (DataBlock&)data, true, - crequest->getIssueTime(), Cycles(0), Cycles(0), false); + crequest->getIssueTime(), Cycles(0), Cycles(0), false, false); delete crequest; coalescedTable.at(address).pop_front(); diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index d6db5c00ba..42efe41cb7 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -289,16 +289,10 @@ class GPUCoalescer : public RubyPort void readCallback(Addr address, DataBlock& data); - void readCallback(Addr address, - MachineType mach, - DataBlock& data); - void readCallback(Addr address, MachineType mach, DataBlock& data, - Cycles initialRequestTime, - Cycles forwardRequestTime, - Cycles firstResponseTime); + bool externalHit); void readCallback(Addr address, MachineType mach, @@ -306,7 +300,16 @@ class GPUCoalescer : public RubyPort Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion); + bool externalHit); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion, + bool externalHit); /* atomics need their own callback because the data might be const coming from SLICC */ @@ -392,7 +395,8 @@ class GPUCoalescer : public RubyPort Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion); + bool isRegion, + bool externalHit); void recordMissLatency(CoalescedRequest* crequest, MachineType mach, Cycles initialRequestTime,