ruby: modifies histogram add() function

This patch modifies the Histogram class' add() function so that it can add
linear histograms as well. The function assumes that the left end point of
the ranges of the two histograms are the same. It also assumes that when
the ranges of the two histogram are changed to accomodate an element not in
the range, the factor used in changing the range is same for both the
histograms.

This function is then used in removing one of the calls to the global
profiler*. The histograms for recording the delays incurred in processing
different requests are now maintained by the controllers. The profiler
adds these histograms when it needs to print the stats.
This commit is contained in:
Nilay Vaish
2013-02-10 21:26:22 -06:00
parent a49b1df3f0
commit bc1daae7fd
9 changed files with 147 additions and 86 deletions

View File

@@ -34,11 +34,10 @@
using namespace std;
Histogram::Histogram(int binsize, int bins)
Histogram::Histogram(int binsize, uint32_t bins)
{
m_binsize = binsize;
m_bins = bins;
clear();
clear(bins);
}
Histogram::~Histogram()
@@ -46,29 +45,43 @@ Histogram::~Histogram()
}
void
Histogram::clear(int binsize, int bins)
Histogram::clear(int binsize, uint32_t bins)
{
m_binsize = binsize;
clear(bins);
}
void
Histogram::clear(int bins)
Histogram::clear(uint32_t bins)
{
m_bins = bins;
m_largest_bin = 0;
m_max = 0;
m_data.resize(m_bins);
for (int i = 0; i < m_bins; i++) {
m_data.resize(bins);
for (uint32_t i = 0; i < bins; i++) {
m_data[i] = 0;
}
m_count = 0;
m_max = 0;
m_sumSamples = 0;
m_sumSquaredSamples = 0;
}
void
Histogram::doubleBinSize()
{
assert(m_binsize != -1);
uint32_t t_bins = m_data.size();
for (uint32_t i = 0; i < t_bins/2; i++) {
m_data[i] = m_data[i*2] + m_data[i*2 + 1];
}
for (uint32_t i = t_bins/2; i < t_bins; i++) {
m_data[i] = 0;
}
m_binsize *= 2;
}
void
Histogram::add(int64 value)
@@ -80,7 +93,8 @@ Histogram::add(int64 value)
m_sumSamples += value;
m_sumSquaredSamples += (value*value);
int index;
uint32_t index;
if (m_binsize == -1) {
// This is a log base 2 histogram
if (value == 0) {
@@ -93,37 +107,59 @@ Histogram::add(int64 value)
}
} else {
// This is a linear histogram
while (m_max >= (m_bins * m_binsize)) {
for (int i = 0; i < m_bins/2; i++) {
m_data[i] = m_data[i*2] + m_data[i*2 + 1];
}
for (int i = m_bins/2; i < m_bins; i++) {
m_data[i] = 0;
}
m_binsize *= 2;
}
uint32_t t_bins = m_data.size();
while (m_max >= (t_bins * m_binsize)) doubleBinSize();
index = value/m_binsize;
}
assert(index >= 0);
assert(index < m_data.size());
m_data[index]++;
m_largest_bin = max(m_largest_bin, index);
}
void
Histogram::add(const Histogram& hist)
Histogram::add(Histogram& hist)
{
assert(hist.getBins() == m_bins);
assert(hist.getBinSize() == -1); // assume log histogram
assert(m_binsize == -1);
uint32_t t_bins = m_data.size();
for (int j = 0; j < hist.getData(0); j++) {
add(0);
if (hist.getBins() != t_bins) {
fatal("Histograms with different number of bins cannot be combined!");
}
for (int i = 1; i < m_bins; i++) {
for (int j = 0; j < hist.getData(i); j++) {
add(1<<(i-1)); // account for the + 1 index
m_max = max(m_max, hist.getMax());
m_count += hist.size();
m_sumSamples += hist.getTotal();
m_sumSquaredSamples += hist.getSquaredTotal();
// Both histograms are log base 2.
if (hist.getBinSize() == -1 && m_binsize == -1) {
for (int j = 0; j < hist.getData(0); j++) {
add(0);
}
for (uint32_t i = 1; i < t_bins; i++) {
for (int j = 0; j < hist.getData(i); j++) {
add(1<<(i-1)); // account for the + 1 index
}
}
} else if (hist.getBinSize() >= 1 && m_binsize >= 1) {
// Both the histogram are linear.
// We are assuming that the two histograms have the same
// minimum value that they can store.
while (m_binsize > hist.getBinSize()) hist.doubleBinSize();
while (hist.getBinSize() > m_binsize) doubleBinSize();
assert(m_binsize == hist.getBinSize());
for (uint32_t i = 0; i < t_bins; i++) {
m_data[i] += hist.getData(i);
if (m_data[i] > 0) m_largest_bin = i;
}
} else {
fatal("Don't know how to combine log and linear histograms!");
}
}
@@ -177,7 +213,8 @@ Histogram::printWithMultiplier(ostream& out, double multiplier) const
<< " | ";
out << "standard deviation: " << getStandardDeviation() << " |";
}
for (int i = 0; i < m_bins && i <= m_largest_bin; i++) {
for (uint32_t i = 0; i <= m_largest_bin; i++) {
if (multiplier == 1.0) {
out << " " << m_data[i];
} else {

View File

@@ -37,34 +37,38 @@
class Histogram
{
public:
Histogram(int binsize = 1, int bins = 50);
Histogram(int binsize = 1, uint32_t bins = 50);
~Histogram();
void add(int64 value);
void add(const Histogram& hist);
void clear() { clear(m_bins); }
void clear(int bins);
void clear(int binsize, int bins);
int64 size() const { return m_count; }
int getBins() const { return m_bins; }
void add(Histogram& hist);
void doubleBinSize();
void clear() { clear(m_data.size()); }
void clear(uint32_t bins);
void clear(int binsize, uint32_t bins);
uint64_t size() const { return m_count; }
uint32_t getBins() const { return m_data.size(); }
int getBinSize() const { return m_binsize; }
int64 getTotal() const { return m_sumSamples; }
int64 getData(int index) const { return m_data[index]; }
uint64_t getSquaredTotal() const { return m_sumSquaredSamples; }
uint64_t getData(int index) const { return m_data[index]; }
int64 getMax() const { return m_max; }
void printWithMultiplier(std::ostream& out, double multiplier) const;
void printPercent(std::ostream& out) const;
void print(std::ostream& out) const;
private:
std::vector<int64> m_data;
std::vector<uint64_t> m_data;
int64 m_max; // the maximum value seen so far
int64 m_count; // the number of elements added
uint64_t m_count; // the number of elements added
int m_binsize; // the size of each bucket
int m_bins; // the number of buckets
int m_largest_bin; // the largest bin used
uint32_t m_largest_bin; // the largest bin used
int64 m_sumSamples; // the sum of all samples
int64 m_sumSquaredSamples; // the sum of the square of all samples
uint64_t m_sumSquaredSamples; // the sum of the square of all samples
double getStandardDeviation() const;
};

View File

@@ -65,7 +65,7 @@ class Network : public ClockedObject
virtual void init();
static int getNumberOfVirtualNetworks() { return m_virtual_networks; }
static uint32_t getNumberOfVirtualNetworks() { return m_virtual_networks; }
static uint32_t MessageSizeType_to_int(MessageSizeType size_type);
// returns the queue requested for the given component

View File

@@ -223,6 +223,38 @@ Profiler::printRequestProfile(ostream &out)
out << endl;
}
void
Profiler::printDelayProfile(ostream &out)
{
out << "Message Delayed Cycles" << endl;
out << "----------------------" << endl;
uint32_t numVNets = Network::getNumberOfVirtualNetworks();
Histogram delayHistogram;
std::vector<Histogram> delayVCHistogram(numVNets);
for (uint32_t i = 0; i < MachineType_NUM; i++) {
for (map<uint32_t, AbstractController*>::iterator it =
g_abs_controls[i].begin();
it != g_abs_controls[i].end(); ++it) {
AbstractController *ctr = (*it).second;
delayHistogram.add(ctr->getDelayHist());
for (uint32_t i = 0; i < numVNets; i++) {
delayVCHistogram[i].add(ctr->getDelayVCHist(i));
}
}
}
out << "Total_delay_cycles: " << delayHistogram << endl;
for (int i = 0; i < numVNets; i++) {
out << " virtual_network_" << i << "_delay_cycles: "
<< delayVCHistogram[i] << endl;
}
}
void
Profiler::printStats(ostream& out, bool short_stats)
{
@@ -435,16 +467,7 @@ Profiler::printStats(ostream& out, bool short_stats)
}
out << endl;
out << "Message Delayed Cycles" << endl;
out << "----------------------" << endl;
out << "Total_delay_cycles: " << m_delayedCyclesHistogram << endl;
out << "Total_nonPF_delay_cycles: "
<< m_delayedCyclesNonPFHistogram << endl;
for (int i = 0; i < m_delayedCyclesVCHistograms.size(); i++) {
out << " virtual_network_" << i << "_delay_cycles: "
<< m_delayedCyclesVCHistograms[i] << endl;
}
printDelayProfile(out);
printResourceUsage(out);
}
}
@@ -488,14 +511,6 @@ Profiler::clearStats()
m_busyBankCount = 0;
m_delayedCyclesHistogram.clear();
m_delayedCyclesNonPFHistogram.clear();
int size = Network::getNumberOfVirtualNetworks();
m_delayedCyclesVCHistograms.resize(size);
for (int i = 0; i < size; i++) {
m_delayedCyclesVCHistograms[i].clear();
}
m_missLatencyHistograms.resize(RubyRequestType_NUM);
for (int i = 0; i < m_missLatencyHistograms.size(); i++) {
m_missLatencyHistograms[i].clear(200);
@@ -593,17 +608,6 @@ Profiler::profileSharing(const Address& addr, AccessType type,
}
}
void
Profiler::profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
{
assert(virtualNetwork < m_delayedCyclesVCHistograms.size());
m_delayedCyclesHistogram.add(delayCycles);
m_delayedCyclesVCHistograms[virtualNetwork].add(delayCycles);
if (virtualNetwork != 0) {
m_delayedCyclesNonPFHistogram.add(delayCycles);
}
}
void
Profiler::profilePFWait(Time waitTime)
{

View File

@@ -152,8 +152,6 @@ class Profiler : public SimObject
void sequencerRequests(int num) { m_sequencer_requests.add(num); }
void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
void print(std::ostream& out) const;
void rubyWatch(int proc);
@@ -172,6 +170,7 @@ class Profiler : public SimObject
private:
void printRequestProfile(std::ostream &out);
void printDelayProfile(std::ostream &out);
private:
// Private copy constructor and assignment operator
@@ -226,10 +225,6 @@ class Profiler : public SimObject
std::vector<Histogram> m_SWPrefetchLatencyHistograms;
std::vector<Histogram> m_SWPrefetchMachLatencyHistograms;
Histogram m_delayedCyclesHistogram;
Histogram m_delayedCyclesNonPFHistogram;
std::vector<Histogram> m_delayedCyclesVCHistograms;
Histogram m_outstanding_requests;
Histogram m_outstanding_persistent_requests;

View File

@@ -52,6 +52,14 @@ AbstractController::clearStats()
{
m_requestProfileMap.clear();
m_request_count = 0;
m_delayHistogram.clear();
uint32_t size = Network::getNumberOfVirtualNetworks();
m_delayVCHistogram.resize(size);
for (uint32_t i = 0; i < size; i++) {
m_delayVCHistogram[i].clear();
}
}
void
@@ -63,3 +71,11 @@ AbstractController::profileRequest(const std::string &request)
// default value which is 0
m_requestProfileMap[request]++;
}
void
AbstractController::profileMsgDelay(uint32_t virtualNetwork, Time delay)
{
assert(virtualNetwork < m_delayVCHistogram.size());
m_delayHistogram.add(delay);
m_delayVCHistogram[virtualNetwork].add(delay);
}

View File

@@ -36,6 +36,7 @@
#include "mem/ruby/common/Address.hh"
#include "mem/ruby/common/Consumer.hh"
#include "mem/ruby/common/DataBlock.hh"
#include "mem/ruby/common/Histogram.hh"
#include "mem/ruby/network/Network.hh"
#include "mem/ruby/recorder/CacheRecorder.hh"
#include "mem/ruby/system/MachineID.hh"
@@ -92,9 +93,15 @@ class AbstractController : public ClockedObject, public Consumer
const std::map<std::string, uint64_t>& getRequestProfileMap() const
{ return m_requestProfileMap; }
Histogram& getDelayHist() { return m_delayHistogram; }
Histogram& getDelayVCHist(uint32_t index)
{ return m_delayVCHistogram[index]; }
protected:
//! Profiles original cache requests including PUTs
void profileRequest(const std::string &request);
//! Profiles the delay associated with messages.
void profileMsgDelay(uint32_t virtualNetwork, Time delay);
protected:
int m_transitions_per_cycle;
@@ -121,6 +128,11 @@ class AbstractController : public ClockedObject, public Consumer
//! call requisite function for updating the count.
std::map<std::string, uint64_t> m_requestProfileMap;
uint64_t m_request_count;
//! Histogram for profiling delay for the messages this controller
//! cares for
Histogram m_delayHistogram;
std::vector<Histogram> m_delayVCHistogram;
};
#endif // __MEM_RUBY_SLICC_INTERFACE_ABSTRACTCONTROLLER_HH__

View File

@@ -55,12 +55,6 @@ profile_sharing(const Address& addr, AccessType type, NodeID requestor,
profileSharing(addr, type, requestor, sharers, owner);
}
void
profileMsgDelay(uint32_t virtualNetwork, Time delayCycles)
{
g_system_ptr->getProfiler()->profileMsgDelay(virtualNetwork, delayCycles);
}
void
profileGetX(const Address& datablock, const Address& PC, const Set& owner,
const Set& sharers, NodeID requestor)

View File

@@ -51,7 +51,6 @@ void profile_token_retry(const Address& addr, AccessType type, int count);
void profile_filter_action(int action);
void profile_persistent_prediction(const Address& addr, AccessType type);
void profile_average_latency_estimate(int latency);
void profileMsgDelay(uint32_t virtualNetwork, Time delayCycles);
void profile_multicast_retry(const Address& addr, int count);
void profileGetX(const Address& datablock, const Address& PC, const Set& owner,