ruby: modifies histogram add() function
This patch modifies the Histogram class' add() function so that it can add linear histograms as well. The function assumes that the left end point of the ranges of the two histograms are the same. It also assumes that when the ranges of the two histogram are changed to accomodate an element not in the range, the factor used in changing the range is same for both the histograms. This function is then used in removing one of the calls to the global profiler*. The histograms for recording the delays incurred in processing different requests are now maintained by the controllers. The profiler adds these histograms when it needs to print the stats.
This commit is contained in:
@@ -34,11 +34,10 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
Histogram::Histogram(int binsize, int bins)
|
||||
Histogram::Histogram(int binsize, uint32_t bins)
|
||||
{
|
||||
m_binsize = binsize;
|
||||
m_bins = bins;
|
||||
clear();
|
||||
clear(bins);
|
||||
}
|
||||
|
||||
Histogram::~Histogram()
|
||||
@@ -46,29 +45,43 @@ Histogram::~Histogram()
|
||||
}
|
||||
|
||||
void
|
||||
Histogram::clear(int binsize, int bins)
|
||||
Histogram::clear(int binsize, uint32_t bins)
|
||||
{
|
||||
m_binsize = binsize;
|
||||
clear(bins);
|
||||
}
|
||||
|
||||
void
|
||||
Histogram::clear(int bins)
|
||||
Histogram::clear(uint32_t bins)
|
||||
{
|
||||
m_bins = bins;
|
||||
m_largest_bin = 0;
|
||||
m_max = 0;
|
||||
m_data.resize(m_bins);
|
||||
for (int i = 0; i < m_bins; i++) {
|
||||
m_data.resize(bins);
|
||||
for (uint32_t i = 0; i < bins; i++) {
|
||||
m_data[i] = 0;
|
||||
}
|
||||
|
||||
m_count = 0;
|
||||
m_max = 0;
|
||||
|
||||
m_sumSamples = 0;
|
||||
m_sumSquaredSamples = 0;
|
||||
}
|
||||
|
||||
void
|
||||
Histogram::doubleBinSize()
|
||||
{
|
||||
assert(m_binsize != -1);
|
||||
uint32_t t_bins = m_data.size();
|
||||
|
||||
for (uint32_t i = 0; i < t_bins/2; i++) {
|
||||
m_data[i] = m_data[i*2] + m_data[i*2 + 1];
|
||||
}
|
||||
for (uint32_t i = t_bins/2; i < t_bins; i++) {
|
||||
m_data[i] = 0;
|
||||
}
|
||||
|
||||
m_binsize *= 2;
|
||||
}
|
||||
|
||||
void
|
||||
Histogram::add(int64 value)
|
||||
@@ -80,7 +93,8 @@ Histogram::add(int64 value)
|
||||
m_sumSamples += value;
|
||||
m_sumSquaredSamples += (value*value);
|
||||
|
||||
int index;
|
||||
uint32_t index;
|
||||
|
||||
if (m_binsize == -1) {
|
||||
// This is a log base 2 histogram
|
||||
if (value == 0) {
|
||||
@@ -93,37 +107,59 @@ Histogram::add(int64 value)
|
||||
}
|
||||
} else {
|
||||
// This is a linear histogram
|
||||
while (m_max >= (m_bins * m_binsize)) {
|
||||
for (int i = 0; i < m_bins/2; i++) {
|
||||
m_data[i] = m_data[i*2] + m_data[i*2 + 1];
|
||||
}
|
||||
for (int i = m_bins/2; i < m_bins; i++) {
|
||||
m_data[i] = 0;
|
||||
}
|
||||
m_binsize *= 2;
|
||||
}
|
||||
uint32_t t_bins = m_data.size();
|
||||
|
||||
while (m_max >= (t_bins * m_binsize)) doubleBinSize();
|
||||
index = value/m_binsize;
|
||||
}
|
||||
assert(index >= 0);
|
||||
|
||||
assert(index < m_data.size());
|
||||
m_data[index]++;
|
||||
m_largest_bin = max(m_largest_bin, index);
|
||||
}
|
||||
|
||||
void
|
||||
Histogram::add(const Histogram& hist)
|
||||
Histogram::add(Histogram& hist)
|
||||
{
|
||||
assert(hist.getBins() == m_bins);
|
||||
assert(hist.getBinSize() == -1); // assume log histogram
|
||||
assert(m_binsize == -1);
|
||||
uint32_t t_bins = m_data.size();
|
||||
|
||||
for (int j = 0; j < hist.getData(0); j++) {
|
||||
add(0);
|
||||
if (hist.getBins() != t_bins) {
|
||||
fatal("Histograms with different number of bins cannot be combined!");
|
||||
}
|
||||
|
||||
for (int i = 1; i < m_bins; i++) {
|
||||
for (int j = 0; j < hist.getData(i); j++) {
|
||||
add(1<<(i-1)); // account for the + 1 index
|
||||
m_max = max(m_max, hist.getMax());
|
||||
m_count += hist.size();
|
||||
m_sumSamples += hist.getTotal();
|
||||
m_sumSquaredSamples += hist.getSquaredTotal();
|
||||
|
||||
// Both histograms are log base 2.
|
||||
if (hist.getBinSize() == -1 && m_binsize == -1) {
|
||||
for (int j = 0; j < hist.getData(0); j++) {
|
||||
add(0);
|
||||
}
|
||||
|
||||
for (uint32_t i = 1; i < t_bins; i++) {
|
||||
for (int j = 0; j < hist.getData(i); j++) {
|
||||
add(1<<(i-1)); // account for the + 1 index
|
||||
}
|
||||
}
|
||||
} else if (hist.getBinSize() >= 1 && m_binsize >= 1) {
|
||||
// Both the histogram are linear.
|
||||
// We are assuming that the two histograms have the same
|
||||
// minimum value that they can store.
|
||||
|
||||
while (m_binsize > hist.getBinSize()) hist.doubleBinSize();
|
||||
while (hist.getBinSize() > m_binsize) doubleBinSize();
|
||||
|
||||
assert(m_binsize == hist.getBinSize());
|
||||
|
||||
for (uint32_t i = 0; i < t_bins; i++) {
|
||||
m_data[i] += hist.getData(i);
|
||||
|
||||
if (m_data[i] > 0) m_largest_bin = i;
|
||||
}
|
||||
} else {
|
||||
fatal("Don't know how to combine log and linear histograms!");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,7 +213,8 @@ Histogram::printWithMultiplier(ostream& out, double multiplier) const
|
||||
<< " | ";
|
||||
out << "standard deviation: " << getStandardDeviation() << " |";
|
||||
}
|
||||
for (int i = 0; i < m_bins && i <= m_largest_bin; i++) {
|
||||
|
||||
for (uint32_t i = 0; i <= m_largest_bin; i++) {
|
||||
if (multiplier == 1.0) {
|
||||
out << " " << m_data[i];
|
||||
} else {
|
||||
|
||||
@@ -37,34 +37,38 @@
|
||||
class Histogram
|
||||
{
|
||||
public:
|
||||
Histogram(int binsize = 1, int bins = 50);
|
||||
Histogram(int binsize = 1, uint32_t bins = 50);
|
||||
~Histogram();
|
||||
|
||||
void add(int64 value);
|
||||
void add(const Histogram& hist);
|
||||
void clear() { clear(m_bins); }
|
||||
void clear(int bins);
|
||||
void clear(int binsize, int bins);
|
||||
int64 size() const { return m_count; }
|
||||
int getBins() const { return m_bins; }
|
||||
void add(Histogram& hist);
|
||||
void doubleBinSize();
|
||||
|
||||
void clear() { clear(m_data.size()); }
|
||||
void clear(uint32_t bins);
|
||||
void clear(int binsize, uint32_t bins);
|
||||
|
||||
uint64_t size() const { return m_count; }
|
||||
uint32_t getBins() const { return m_data.size(); }
|
||||
int getBinSize() const { return m_binsize; }
|
||||
int64 getTotal() const { return m_sumSamples; }
|
||||
int64 getData(int index) const { return m_data[index]; }
|
||||
uint64_t getSquaredTotal() const { return m_sumSquaredSamples; }
|
||||
uint64_t getData(int index) const { return m_data[index]; }
|
||||
int64 getMax() const { return m_max; }
|
||||
|
||||
void printWithMultiplier(std::ostream& out, double multiplier) const;
|
||||
void printPercent(std::ostream& out) const;
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
private:
|
||||
std::vector<int64> m_data;
|
||||
std::vector<uint64_t> m_data;
|
||||
int64 m_max; // the maximum value seen so far
|
||||
int64 m_count; // the number of elements added
|
||||
uint64_t m_count; // the number of elements added
|
||||
int m_binsize; // the size of each bucket
|
||||
int m_bins; // the number of buckets
|
||||
int m_largest_bin; // the largest bin used
|
||||
uint32_t m_largest_bin; // the largest bin used
|
||||
|
||||
int64 m_sumSamples; // the sum of all samples
|
||||
int64 m_sumSquaredSamples; // the sum of the square of all samples
|
||||
uint64_t m_sumSquaredSamples; // the sum of the square of all samples
|
||||
|
||||
double getStandardDeviation() const;
|
||||
};
|
||||
|
||||
@@ -65,7 +65,7 @@ class Network : public ClockedObject
|
||||
|
||||
virtual void init();
|
||||
|
||||
static int getNumberOfVirtualNetworks() { return m_virtual_networks; }
|
||||
static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; }
|
||||
static uint32_t MessageSizeType_to_int(MessageSizeType size_type);
|
||||
|
||||
// returns the queue requested for the given component
|
||||
|
||||
@@ -223,6 +223,38 @@ Profiler::printRequestProfile(ostream &out)
|
||||
out << endl;
|
||||
}
|
||||
|
||||
void
|
||||
Profiler::printDelayProfile(ostream &out)
|
||||
{
|
||||
out << "Message Delayed Cycles" << endl;
|
||||
out << "----------------------" << endl;
|
||||
|
||||
uint32_t numVNets = Network::getNumberOfVirtualNetworks();
|
||||
Histogram delayHistogram;
|
||||
std::vector<Histogram> delayVCHistogram(numVNets);
|
||||
|
||||
for (uint32_t i = 0; i < MachineType_NUM; i++) {
|
||||
for (map<uint32_t, AbstractController*>::iterator it =
|
||||
g_abs_controls[i].begin();
|
||||
it != g_abs_controls[i].end(); ++it) {
|
||||
|
||||
AbstractController *ctr = (*it).second;
|
||||
delayHistogram.add(ctr->getDelayHist());
|
||||
|
||||
for (uint32_t i = 0; i < numVNets; i++) {
|
||||
delayVCHistogram[i].add(ctr->getDelayVCHist(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out << "Total_delay_cycles: " << delayHistogram << endl;
|
||||
|
||||
for (int i = 0; i < numVNets; i++) {
|
||||
out << " virtual_network_" << i << "_delay_cycles: "
|
||||
<< delayVCHistogram[i] << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Profiler::printStats(ostream& out, bool short_stats)
|
||||
{
|
||||
@@ -435,16 +467,7 @@ Profiler::printStats(ostream& out, bool short_stats)
|
||||
}
|
||||
|
||||
out << endl;
|
||||
out << "Message Delayed Cycles" << endl;
|
||||
out << "----------------------" << endl;
|
||||
out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl;
|
||||
out << "Total_nonPF_delay_cycles: "
|
||||
<< m_delayedCyclesNonPFHistogram << endl;
|
||||
for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
|
||||
out << " virtual_network_" << i << "_delay_cycles: "
|
||||
<< m_delayedCyclesVCHistograms[i] << endl;
|
||||
}
|
||||
|
||||
printDelayProfile(out);
|
||||
printResourceUsage(out);
|
||||
}
|
||||
}
|
||||
@@ -488,14 +511,6 @@ Profiler::clearStats()
|
||||
|
||||
m_busyBankCount = 0;
|
||||
|
||||
m_delayedCyclesHistogram.clear();
|
||||
m_delayedCyclesNonPFHistogram.clear();
|
||||
int size = Network::getNumberOfVirtualNetworks();
|
||||
m_delayedCyclesVCHistograms.resize(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
m_delayedCyclesVCHistograms[i].clear();
|
||||
}
|
||||
|
||||
m_missLatencyHistograms.resize(RubyRequestType_NUM);
|
||||
for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
|
||||
m_missLatencyHistograms[i].clear(200);
|
||||
@@ -593,17 +608,6 @@ Profiler::profileSharing(const Address& addr, AccessType type,
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
|
||||
{
|
||||
assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
|
||||
m_delayedCyclesHistogram.add(delayCycles);
|
||||
m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
|
||||
if (virtualNetwork != 0) {
|
||||
m_delayedCyclesNonPFHistogram.add(delayCycles);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Profiler::profilePFWait(Time waitTime)
|
||||
{
|
||||
|
||||
@@ -152,8 +152,6 @@ class Profiler : public SimObject
|
||||
|
||||
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
|
||||
|
||||
void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
|
||||
|
||||
void print(std::ostream& out) const;
|
||||
|
||||
void rubyWatch(int proc);
|
||||
@@ -172,6 +170,7 @@ class Profiler : public SimObject
|
||||
|
||||
private:
|
||||
void printRequestProfile(std::ostream &out);
|
||||
void printDelayProfile(std::ostream &out);
|
||||
|
||||
private:
|
||||
// Private copy constructor and assignment operator
|
||||
@@ -226,10 +225,6 @@ class Profiler : public SimObject
|
||||
std::vector<Histogram> m_SWPrefetchLatencyHistograms;
|
||||
std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
|
||||
|
||||
Histogram m_delayedCyclesHistogram;
|
||||
Histogram m_delayedCyclesNonPFHistogram;
|
||||
std::vector<Histogram> m_delayedCyclesVCHistograms;
|
||||
|
||||
Histogram m_outstanding_requests;
|
||||
Histogram m_outstanding_persistent_requests;
|
||||
|
||||
|
||||
@@ -52,6 +52,14 @@ AbstractController::clearStats()
|
||||
{
|
||||
m_requestProfileMap.clear();
|
||||
m_request_count = 0;
|
||||
|
||||
m_delayHistogram.clear();
|
||||
|
||||
uint32_t size = Network::getNumberOfVirtualNetworks();
|
||||
m_delayVCHistogram.resize(size);
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
m_delayVCHistogram[i].clear();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -63,3 +71,11 @@ AbstractController::profileRequest(const std::string &request)
|
||||
// default value which is 0
|
||||
m_requestProfileMap[request]++;
|
||||
}
|
||||
|
||||
void
|
||||
AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay)
|
||||
{
|
||||
assert(virtualNetwork < m_delayVCHistogram.size());
|
||||
m_delayHistogram.add(delay);
|
||||
m_delayVCHistogram[virtualNetwork].add(delay);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "mem/ruby/common/Address.hh"
|
||||
#include "mem/ruby/common/Consumer.hh"
|
||||
#include "mem/ruby/common/DataBlock.hh"
|
||||
#include "mem/ruby/common/Histogram.hh"
|
||||
#include "mem/ruby/network/Network.hh"
|
||||
#include "mem/ruby/recorder/CacheRecorder.hh"
|
||||
#include "mem/ruby/system/MachineID.hh"
|
||||
@@ -92,9 +93,15 @@ class AbstractController : public ClockedObject, public Consumer
|
||||
const std::map<std::string, uint64_t>& getRequestProfileMap() const
|
||||
{ return m_requestProfileMap; }
|
||||
|
||||
Histogram& getDelayHist() { return m_delayHistogram; }
|
||||
Histogram& getDelayVCHist(uint32_t index)
|
||||
{ return m_delayVCHistogram[index]; }
|
||||
|
||||
protected:
|
||||
//! Profiles original cache requests including PUTs
|
||||
void profileRequest(const std::string &request);
|
||||
//! Profiles the delay associated with messages.
|
||||
void profileMsgDelay(uint32_t virtualNetwork, Time delay);
|
||||
|
||||
protected:
|
||||
int m_transitions_per_cycle;
|
||||
@@ -121,6 +128,11 @@ class AbstractController : public ClockedObject, public Consumer
|
||||
//! call requisite function for updating the count.
|
||||
std::map<std::string, uint64_t> m_requestProfileMap;
|
||||
uint64_t m_request_count;
|
||||
|
||||
//! Histogram for profiling delay for the messages this controller
|
||||
//! cares for
|
||||
Histogram m_delayHistogram;
|
||||
std::vector<Histogram> m_delayVCHistogram;
|
||||
};
|
||||
|
||||
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__
|
||||
|
||||
@@ -55,12 +55,6 @@ profile_sharing(const Address& addr, AccessType type, NodeID requestor,
|
||||
profileSharing(addr, type, requestor, sharers, owner);
|
||||
}
|
||||
|
||||
void
|
||||
profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
|
||||
{
|
||||
g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
|
||||
}
|
||||
|
||||
void
|
||||
profileGetX(const Address& datablock, const Address& PC, const Set& owner,
|
||||
const Set& sharers, NodeID requestor)
|
||||
|
||||
@@ -51,7 +51,6 @@ void profile_token_retry(const Address& addr, AccessType type, int count);
|
||||
void profile_filter_action(int action);
|
||||
void profile_persistent_prediction(const Address& addr, AccessType type);
|
||||
void profile_average_latency_estimate(int latency);
|
||||
void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
|
||||
|
||||
void profile_multicast_retry(const Address& addr, int count);
|
||||
void profileGetX(const Address& datablock, const Address& PC, const Set& owner,
|
||||
|
||||
Reference in New Issue
Block a user