ruby: Reincarnated the responding machine profiling

This patch adds back to ruby the capability to understand the response time
for messages that hit in different levels of the cache heirarchy.
Specifically add support for the MI_example, MOESI_hammer, and MOESI_CMP_token
protocols.
This commit is contained in:
Brad Beckmann
2010-08-20 11:46:12 -07:00
parent 9fb4381ddc
commit 4b4e725921
8 changed files with 210 additions and 55 deletions

View File

@@ -121,6 +121,17 @@ machine(L1Cache, "MI Example L1 Cache")
}
}
GenericMachineType getNondirectHitMachType(MachineID sender) {
if (machineIDToMachineType(sender) == MachineType:L1Cache) {
//
// NOTE direct local hits should not call this
//
return GenericMachineType:L1Cache_wCC;
} else {
return ConvertMachToGenericMach(machineIDToMachineType(sender));
}
}
// NETWORK PORTS
@@ -263,14 +274,35 @@ machine(L1Cache, "MI Example L1 Cache")
action(r_load_hit, "r", desc="Notify sequencer the load completed.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.readCallback(address, getCacheEntry(address).DataBlk);
sequencer.readCallback(address,
GenericMachineType:L1Cache,
getCacheEntry(address).DataBlk);
}
action(rx_load_hit, "rx", desc="External load completed.") {
peek(responseNetwork_in, ResponseMsg) {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.readCallback(address,
getNondirectHitMachType(in_msg.Sender),
getCacheEntry(address).DataBlk);
}
}
action(s_store_hit, "s", desc="Notify sequencer that store completed.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
GenericMachineType:L1Cache,
getCacheEntry(address).DataBlk);
}
action(sx_store_hit, "sx", desc="External store completed.") {
peek(responseNetwork_in, ResponseMsg) {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
getNondirectHitMachType(in_msg.Sender),
getCacheEntry(address).DataBlk);
}
}
action(u_writeDataToCache, "u", desc="Write data to the cache") {
peek(responseNetwork_in, ResponseMsg) {
@@ -342,14 +374,14 @@ machine(L1Cache, "MI Example L1 Cache")
transition(IS, Data, M) {
u_writeDataToCache;
r_load_hit;
rx_load_hit;
w_deallocateTBE;
n_popResponseQueue;
}
transition(IM, Data, M) {
u_writeDataToCache;
s_store_hit;
sx_store_hit;
w_deallocateTBE;
n_popResponseQueue;
}

View File

@@ -374,24 +374,27 @@ machine(L1Cache, "Token protocol")
}
}
// GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
// if (machineIDToMachineType(sender) == MachineType:L1Cache) {
// return GenericMachineType:L1Cache_wCC; // NOTE direct L1 hits should not call this
// } else if (machineIDToMachineType(sender) == MachineType:L2Cache) {
//
// if (sender == (mapAddressToRange(addr,
// MachineType:L2Cache,
// l2_select_low_bit,
// l2_select_num_bits))) {
//
// return GenericMachineType:L2Cache;
// } else {
// return GenericMachineType:L2Cache_wCC;
// }
// } else {
// return ConvertMachToGenericMach(machineIDToMachineType(sender));
// }
// }
GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
if (machineIDToMachineType(sender) == MachineType:L1Cache) {
//
// NOTE direct local hits should not call this
//
return GenericMachineType:L1Cache_wCC;
} else if (machineIDToMachineType(sender) == MachineType:L2Cache) {
if (sender == (mapAddressToRange(addr,
MachineType:L2Cache,
l2_select_low_bit,
l2_select_num_bits))) {
return GenericMachineType:L2Cache;
} else {
return GenericMachineType:L2Cache_wCC;
}
} else {
return ConvertMachToGenericMach(machineIDToMachineType(sender));
}
}
bool okToIssueStarving(Address addr, MachineID machinID) {
return persistentTable.okToIssueStarving(addr, machineID);
@@ -1136,8 +1139,11 @@ machine(L1Cache, "Token protocol")
action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
//sequencer.readCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
sequencer.readCallback(address, getCacheEntry(address).DataBlk);
sequencer.readCallback(address,
GenericMachineType:L1Cache,
getCacheEntry(address).DataBlk);
}
action(x_external_load_hit, "x", desc="Notify sequencer the load completed.") {
@@ -1145,16 +1151,21 @@ machine(L1Cache, "Token protocol")
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseNetwork_in, ResponseMsg) {
//sequencer.readCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
sequencer.readCallback(address, getCacheEntry(address).DataBlk);
sequencer.readCallback(address,
getNondirectHitMachType(address, in_msg.Sender),
getCacheEntry(address).DataBlk);
}
}
action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
//sequencer.writeCallback(address, getCacheEntry(address).DataBlk, GenericMachineType:L1Cache, PrefetchBit:No);
sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
GenericMachineType:L1Cache,
getCacheEntry(address).DataBlk);
getCacheEntry(address).Dirty := true;
DEBUG_EXPR(getCacheEntry(address).DataBlk);
}
@@ -1163,8 +1174,11 @@ machine(L1Cache, "Token protocol")
DEBUG_EXPR(address);
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseNetwork_in, ResponseMsg) {
//sequencer.writeCallback(address, getCacheEntry(address).DataBlk, getNondirectHitMachType(in_msg.Address, in_msg.Sender), PrefetchBit:No);
sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
getNondirectHitMachType(address, in_msg.Sender),
getCacheEntry(address).DataBlk);
}
getCacheEntry(address).Dirty := true;
DEBUG_EXPR(getCacheEntry(address).DataBlk);

View File

@@ -114,6 +114,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
State CacheState, desc="cache state";
bool Dirty, desc="Is the data dirty (different than memory)?";
DataBlock DataBlk, desc="data for the block";
bool FromL2, default="false", desc="block just moved from L2";
}
// TBE fields
@@ -123,6 +124,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
bool Dirty, desc="Is the data dirty (different than memory)?";
int NumPendingMsgs, desc="Number of acks/data messages that this processor is waiting for";
bool Sharers, desc="On a GetS, did we find any other sharers in the system";
MachineID LastResponder, desc="last machine to send a response for this request";
}
external_type(TBETable) {
@@ -214,6 +216,26 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
}
GenericMachineType getNondirectHitMachType(Address addr, MachineID sender) {
if (machineIDToMachineType(sender) == MachineType:L1Cache) {
//
// NOTE direct local hits should not call this
//
return GenericMachineType:L1Cache_wCC;
} else {
return ConvertMachToGenericMach(machineIDToMachineType(sender));
}
}
GenericMachineType testAndClearLocalHit(Address addr) {
if (getCacheEntry(addr).FromL2) {
getCacheEntry(addr).FromL2 := false;
return GenericMachineType:L2Cache;
} else {
return GenericMachineType:L1Cache;
}
}
MessageBuffer triggerQueue, ordered="true";
// ** OUT_PORTS **
@@ -487,12 +509,54 @@ machine(L1Cache, "AMD Hammer-like protocol")
action(h_load_hit, "h", desc="Notify sequencer the load completed.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.readCallback(address, getCacheEntry(address).DataBlk);
sequencer.readCallback(address,
testAndClearLocalHit(address),
getCacheEntry(address).DataBlk);
}
action(hx_external_load_hit, "hx", desc="load required external msgs") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseToCache_in, ResponseMsg) {
sequencer.readCallback(address,
getNondirectHitMachType(in_msg.Address, in_msg.Sender),
getCacheEntry(address).DataBlk);
}
}
action(hh_store_hit, "\h", desc="Notify sequencer that store completed.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.writeCallback(address, getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
testAndClearLocalHit(address),
getCacheEntry(address).DataBlk);
getCacheEntry(address).Dirty := true;
}
action(sx_external_store_hit, "sx", desc="store required external msgs.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
peek(responseToCache_in, ResponseMsg) {
sequencer.writeCallback(address,
getNondirectHitMachType(address, in_msg.Sender),
getCacheEntry(address).DataBlk);
}
getCacheEntry(address).Dirty := true;
}
action(sxt_trig_ext_store_hit, "sxt", desc="store required external msgs.") {
DEBUG_EXPR(getCacheEntry(address).DataBlk);
sequencer.writeCallback(address,
getNondirectHitMachType(address,
TBEs[address].LastResponder),
getCacheEntry(address).DataBlk);
getCacheEntry(address).Dirty := true;
}
@@ -522,6 +586,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
DEBUG_EXPR(TBEs[address].NumPendingMsgs);
TBEs[address].NumPendingMsgs := TBEs[address].NumPendingMsgs - in_msg.Acks;
DEBUG_EXPR(TBEs[address].NumPendingMsgs);
TBEs[address].LastResponder := in_msg.Sender;
}
}
@@ -671,9 +736,11 @@ machine(L1Cache, "AMD Hammer-like protocol")
if (L1DcacheMemory.isTagPresent(address)) {
static_cast(Entry, L1DcacheMemory[address]).Dirty := static_cast(Entry, L2cacheMemory[address]).Dirty;
static_cast(Entry, L1DcacheMemory[address]).DataBlk := static_cast(Entry, L2cacheMemory[address]).DataBlk;
static_cast(Entry, L1DcacheMemory[address]).FromL2 := true;
} else {
static_cast(Entry, L1IcacheMemory[address]).Dirty := static_cast(Entry, L2cacheMemory[address]).Dirty;
static_cast(Entry, L1IcacheMemory[address]).DataBlk := static_cast(Entry, L2cacheMemory[address]).DataBlk;
static_cast(Entry, L1IcacheMemory[address]).FromL2 := true;
}
}
@@ -905,7 +972,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
u_writeDataToCache;
m_decrementNumberOfMessages;
o_checkForCompletion;
hh_store_hit;
sx_external_store_hit;
n_popResponseQueue;
}
@@ -941,7 +1008,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
transition(ISM, All_acks_no_sharers, MM) {
hh_store_hit;
sxt_trig_ext_store_hit;
g_sendUnblock;
s_deallocateTBE;
j_popTriggerQueue;
@@ -967,7 +1034,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
}
transition(OM, {All_acks, All_acks_no_sharers}, MM) {
hh_store_hit;
sxt_trig_ext_store_hit;
g_sendUnblock;
s_deallocateTBE;
j_popTriggerQueue;
@@ -997,7 +1064,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
u_writeDataToCache;
m_decrementNumberOfMessages;
o_checkForCompletion;
h_load_hit;
hx_external_load_hit;
n_popResponseQueue;
}
@@ -1005,7 +1072,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
u_writeDataToCache;
m_decrementNumberOfMessages;
o_checkForCompletion;
h_load_hit;
hx_external_load_hit;
n_popResponseQueue;
}
@@ -1014,7 +1081,7 @@ machine(L1Cache, "AMD Hammer-like protocol")
r_setSharerBit;
m_decrementNumberOfMessages;
o_checkForCompletion;
h_load_hit;
hx_external_load_hit;
n_popResponseQueue;
}

View File

@@ -100,7 +100,9 @@ external_type(NetDest, non_obj="yes") {
external_type(Sequencer) {
void readCallback(Address, DataBlock);
void readCallback(Address, GenericMachineType, DataBlock);
void writeCallback(Address, DataBlock);
void writeCallback(Address, GenericMachineType, DataBlock);
void checkCoherence(Address);
void profileNack(Address, int, int, uint64);
}

View File

@@ -574,23 +574,27 @@ Profiler::bankBusy()
// non-zero cycle demand request
void
Profiler::missLatency(Time t, RubyRequestType type)
Profiler::missLatency(Time cycles,
RubyRequestType type,
const GenericMachineType respondingMach)
{
m_allMissLatencyHistogram.add(t);
m_missLatencyHistograms[type].add(t);
m_allMissLatencyHistogram.add(cycles);
m_missLatencyHistograms[type].add(cycles);
m_machLatencyHistograms[respondingMach].add(cycles);
}
// non-zero cycle prefetch request
void
Profiler::swPrefetchLatency(Time t, CacheRequestType type,
GenericMachineType respondingMach)
Profiler::swPrefetchLatency(Time cycles,
CacheRequestType type,
const GenericMachineType respondingMach)
{
m_allSWPrefetchLatencyHistogram.add(t);
m_SWPrefetchLatencyHistograms[type].add(t);
m_SWPrefetchMachLatencyHistograms[respondingMach].add(t);
m_allSWPrefetchLatencyHistogram.add(cycles);
m_SWPrefetchLatencyHistograms[type].add(cycles);
m_SWPrefetchMachLatencyHistograms[respondingMach].add(cycles);
if (respondingMach == GenericMachineType_Directory ||
respondingMach == GenericMachineType_NUM) {
m_SWPrefetchL2MissLatencyHistogram.add(t);
m_SWPrefetchL2MissLatencyHistogram.add(cycles);
}
}

View File

@@ -133,9 +133,15 @@ class Profiler : public SimObject, public Consumer
void controllerBusy(MachineID machID);
void bankBusy();
void missLatency(Time t, RubyRequestType type);
void swPrefetchLatency(Time t, CacheRequestType type,
GenericMachineType respondingMach);
void missLatency(Time t,
RubyRequestType type,
const GenericMachineType respondingMach);
void swPrefetchLatency(Time t,
CacheRequestType type,
const GenericMachineType respondingMach);
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
void profileTransition(const std::string& component, NodeID version,

View File

@@ -304,6 +304,14 @@ Sequencer::removeRequest(SequencerRequest* srequest)
void
Sequencer::writeCallback(const Address& address, DataBlock& data)
{
writeCallback(address, GenericMachineType_NULL, data);
}
void
Sequencer::writeCallback(const Address& address,
GenericMachineType mach,
DataBlock& data)
{
assert(address == line_address(address));
assert(m_writeRequestTable.count(line_address(address)));
@@ -329,11 +337,19 @@ Sequencer::writeCallback(const Address& address, DataBlock& data)
m_controller->unblock(address);
}
hitCallback(request, data);
hitCallback(request, mach, data);
}
void
Sequencer::readCallback(const Address& address, DataBlock& data)
{
readCallback(address, GenericMachineType_NULL, data);
}
void
Sequencer::readCallback(const Address& address,
GenericMachineType mach,
DataBlock& data)
{
assert(address == line_address(address));
assert(m_readRequestTable.count(line_address(address)));
@@ -349,11 +365,13 @@ Sequencer::readCallback(const Address& address, DataBlock& data)
(request->ruby_request.type == RubyRequestType_RMW_Read) ||
(request->ruby_request.type == RubyRequestType_IFETCH));
hitCallback(request, data);
hitCallback(request, mach, data);
}
void
Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data)
Sequencer::hitCallback(SequencerRequest* srequest,
GenericMachineType mach,
DataBlock& data)
{
const RubyRequest & ruby_request = srequest->ruby_request;
Address request_address(ruby_request.paddr);
@@ -376,7 +394,7 @@ Sequencer::hitCallback(SequencerRequest* srequest, DataBlock& data)
// Profile the miss latency for all non-zero demand misses
if (miss_latency != 0) {
g_system_ptr->getProfiler()->missLatency(miss_latency, type);
g_system_ptr->getProfiler()->missLatency(miss_latency, type, mach);
if (Debug::getProtocolTrace()) {
g_system_ptr->getProfiler()->

View File

@@ -75,8 +75,17 @@ class Sequencer : public RubyPort, public Consumer
void printProgress(std::ostream& out) const;
void writeCallback(const Address& address, DataBlock& data);
void writeCallback(const Address& address,
GenericMachineType mach,
DataBlock& data);
void readCallback(const Address& address, DataBlock& data);
void readCallback(const Address& address,
GenericMachineType mach,
DataBlock& data);
RequestStatus makeRequest(const RubyRequest & request);
RequestStatus getRequestStatus(const RubyRequest& request);
bool empty() const;
@@ -94,7 +103,10 @@ class Sequencer : public RubyPort, public Consumer
int size, DataBlock*& data_ptr);
void issueRequest(const RubyRequest& request);
void hitCallback(SequencerRequest* request, DataBlock& data);
void hitCallback(SequencerRequest* request,
GenericMachineType mach,
DataBlock& data);
bool insertRequest(SequencerRequest* request);