diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index da4318bcf9..69a4cb8c73 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -655,10 +655,12 @@ machine(MachineType:TCC, "TCC Cache") action(p_profileMiss, "pm", desc="Profile cache miss") { L2cache.profileDemandMiss(); + DPRINTF(RubyHitMiss, "in TCC miss at %#lx\n", address); } action(p_profileHit, "ph", desc="Profile cache hit") { L2cache.profileDemandHit(); + DPRINTF(RubyHitMiss, "in TCC hit at %#lx\n", address); } action(t_allocateTBE, "t", desc="allocate TBE Entry") { diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm index 0d740ef473..5d98a73041 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCP.sm @@ -379,7 +379,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, tmp, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, tmp); + coalescer.readCallback(address, MachineType:L1Cache, tmp, false); } if(is_valid(cache_entry)) { unset_cache_entry(); @@ -404,7 +404,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, false); } } else { enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { @@ -565,7 +565,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, true, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, true); } } @@ -574,7 +574,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, cache_entry.DataBlk, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk); + coalescer.readCallback(address, MachineType:L1Cache, cache_entry.DataBlk, false); } } @@ -587,7 +587,7 @@ machine(MachineType:TCP, "GPU TCP (L1 Data Cache)") if (use_seq_not_coal) { sequencer.readCallback(address, tmp, false, MachineType:L1Cache); } else { - coalescer.readCallback(address, MachineType:L1Cache, tmp); + coalescer.readCallback(address, MachineType:L1Cache, tmp, false); } } } diff --git a/src/mem/ruby/protocol/GPU_VIPER-msg.sm b/src/mem/ruby/protocol/GPU_VIPER-msg.sm index 106433f2c5..566b0d94cd 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-msg.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-msg.sm @@ -31,11 +31,11 @@ structure (GPUCoalescer, external = "yes") { void readCallback(Addr, DataBlock); - void readCallback(Addr, MachineType, DataBlock); + void readCallback(Addr, MachineType, DataBlock, bool); void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles); + Cycles, Cycles, Cycles, bool); void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles, bool); + Cycles, Cycles, Cycles, bool, bool); void writeCallback(Addr, DataBlock); void writeCallback(Addr, MachineType, DataBlock); void writeCallback(Addr, MachineType, DataBlock, @@ -49,11 +49,11 @@ structure (GPUCoalescer, external = "yes") { structure (VIPERCoalescer, external = "yes") { void readCallback(Addr, DataBlock); - void readCallback(Addr, MachineType, DataBlock); + void readCallback(Addr, MachineType, DataBlock, bool); void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles); + Cycles, Cycles, Cycles, bool); void readCallback(Addr, MachineType, DataBlock, - Cycles, Cycles, Cycles, bool); + Cycles, Cycles, Cycles, bool, bool); void writeCallback(Addr, DataBlock); void writeCallback(Addr, MachineType, DataBlock); void writeCallback(Addr, MachineType, DataBlock, diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc index 5ee4105597..90e57554c3 100644 --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -38,6 +38,7 @@ #include "debug/GPUCoalescer.hh" #include "debug/MemoryAccess.hh" #include "debug/ProtocolTrace.hh" +#include "debug/RubyHitMiss.hh" #include "debug/RubyPort.hh" #include "debug/RubyStats.hh" #include "gpu-compute/shader.hh" @@ -438,7 +439,7 @@ GPUCoalescer::writeCallback(Addr address, auto crequest = coalescedTable.at(address).front(); hitCallback(crequest, mach, data, true, crequest->getIssueTime(), - forwardRequestTime, firstResponseTime, isRegion); + forwardRequestTime, firstResponseTime, isRegion, false); // remove this crequest in coalescedTable delete crequest; @@ -485,29 +486,16 @@ GPUCoalescer::writeCompleteCallback(Addr address, void GPUCoalescer::readCallback(Addr address, DataBlock& data) { - readCallback(address, MachineType_NULL, data); -} - -void -GPUCoalescer::readCallback(Addr address, - MachineType mach, - DataBlock& data) -{ - readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0)); + readCallback(address, MachineType_NULL, data, false); } void GPUCoalescer::readCallback(Addr address, MachineType mach, DataBlock& data, - Cycles initialRequestTime, - Cycles forwardRequestTime, - Cycles firstResponseTime) + bool externalHit) { - - readCallback(address, mach, data, - initialRequestTime, forwardRequestTime, firstResponseTime, - false); + readCallback(address, mach, data, Cycles(0), Cycles(0), Cycles(0), externalHit); } void @@ -517,7 +505,23 @@ GPUCoalescer::readCallback(Addr address, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion) + bool externalHit) +{ + + readCallback(address, mach, data, + initialRequestTime, forwardRequestTime, firstResponseTime, + false, externalHit); +} + +void +GPUCoalescer::readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion, + bool externalHit) { assert(address == makeLineAddress(address)); assert(coalescedTable.count(address)); @@ -527,7 +531,7 @@ GPUCoalescer::readCallback(Addr address, "readCallback received non-read type response\n"); hitCallback(crequest, mach, data, true, crequest->getIssueTime(), - forwardRequestTime, firstResponseTime, isRegion); + forwardRequestTime, firstResponseTime, isRegion, externalHit); delete crequest; coalescedTable.at(address).pop_front(); @@ -547,7 +551,8 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion) + bool isRegion, + bool externalHit) { PacketPtr pkt = crequest->getFirstPkt(); Addr request_address = pkt->getAddr(); @@ -558,6 +563,10 @@ GPUCoalescer::hitCallback(CoalescedRequest* crequest, DPRINTF(GPUCoalescer, "Got hitCallback for 0x%X\n", request_line_address); + DPRINTF(RubyHitMiss, "GPU TCP Cache %s at %#x\n", + externalHit ? "hit" : "miss", + printAddress(request_address)); + recordMissLatency(crequest, mach, initialRequestTime, forwardRequestTime, @@ -956,7 +965,7 @@ GPUCoalescer::atomicCallback(Addr address, "atomicCallback saw non-atomic type response\n"); hitCallback(crequest, mach, (DataBlock&)data, true, - crequest->getIssueTime(), Cycles(0), Cycles(0), false); + crequest->getIssueTime(), Cycles(0), Cycles(0), false, false); delete crequest; coalescedTable.at(address).pop_front(); diff --git a/src/mem/ruby/system/GPUCoalescer.hh b/src/mem/ruby/system/GPUCoalescer.hh index d6db5c00ba..21b2335a0e 100644 --- a/src/mem/ruby/system/GPUCoalescer.hh +++ b/src/mem/ruby/system/GPUCoalescer.hh @@ -289,16 +289,10 @@ class GPUCoalescer : public RubyPort void readCallback(Addr address, DataBlock& data); - void readCallback(Addr address, - MachineType mach, - DataBlock& data); - void readCallback(Addr address, MachineType mach, DataBlock& data, - Cycles initialRequestTime, - Cycles forwardRequestTime, - Cycles firstResponseTime); + bool externalHit = false); void readCallback(Addr address, MachineType mach, @@ -306,7 +300,16 @@ class GPUCoalescer : public RubyPort Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion); + bool externalHit = false); + + void readCallback(Addr address, + MachineType mach, + DataBlock& data, + Cycles initialRequestTime, + Cycles forwardRequestTime, + Cycles firstResponseTime, + bool isRegion, + bool externalHit = false); /* atomics need their own callback because the data might be const coming from SLICC */ @@ -392,13 +395,17 @@ class GPUCoalescer : public RubyPort Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool isRegion); + bool isRegion, + bool externalHit = false); + void recordMissLatency(CoalescedRequest* crequest, MachineType mach, Cycles initialRequestTime, Cycles forwardRequestTime, Cycles firstResponseTime, - bool success, bool isRegion); + bool success, + bool isRegion); + void completeHitCallback(std::vector & mylist); virtual RubyRequestType getRequestType(PacketPtr pkt);