base,tests: Create unit tests for Stats::Stor

Create unit tests for the stats storage types. As a side effect
storage-related classes have been moved to separate files.

HistStor's grow_up, grow_out, and grow_convert have been made
private and renamed to comply with gem5's naming convention
and make grow_convert match its grow_up counterpart (growDown)
which is more suitable for its expected behavior.

The params declarations have been moved to be close to their
storage class' constructor.

HistStor has a explicit condition stating that there must be
at least 2 buckets.

Added documentation!

Fixed grow_convert so that it yields consistent histograms.
Previously buckets could not fully intersect, so doubling their
bucket size would make them steal contents innaproprietly. For
example, the neighbors [-6,-2[, [-2,2[, [2,6[, when doubled,
become [-12,-4[, [-4,4[, [4,12[; however, since the individual
values are not stored, it is impossible to know how to populate
the middle bucket with its neighbor's partial contents.
This fix forces the middle bucket of a storage to have its lower
bound at 0, solving the partial intersection issue.

Change-Id: Idb063e3dbda3cce3a8969e347660143162146eb9
Signed-off-by: Daniel R. Carvalho <odanrc@yahoo.com.br>
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/25425
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Daniel R. Carvalho
2020-02-05 22:12:38 +01:00
committed by Daniel Carvalho
parent e59557af50
commit c9e069a2f2
6 changed files with 2302 additions and 808 deletions

View File

@@ -131,114 +131,6 @@ InfoAccess::info() const
}
}
StorageParams::~StorageParams()
{
}
void
HistStor::grow_out()
{
int size = cvec.size();
int zero = size / 2; // round down!
int top_half = zero + (size - zero + 1) / 2; // round up!
int bottom_half = (size - zero) / 2; // round down!
// grow down
int low_pair = zero - 1;
for (int i = zero - 1; i >= bottom_half; i--) {
cvec[i] = cvec[low_pair];
if (low_pair - 1 >= 0)
cvec[i] += cvec[low_pair - 1];
low_pair -= 2;
}
assert(low_pair == 0 || low_pair == -1 || low_pair == -2);
for (int i = bottom_half - 1; i >= 0; i--)
cvec[i] = Counter();
// grow up
int high_pair = zero;
for (int i = zero; i < top_half; i++) {
cvec[i] = cvec[high_pair];
if (high_pair + 1 < size)
cvec[i] += cvec[high_pair + 1];
high_pair += 2;
}
assert(high_pair == size || high_pair == size + 1);
for (int i = top_half; i < size; i++)
cvec[i] = Counter();
max_bucket *= 2;
min_bucket *= 2;
bucket_size *= 2;
}
void
HistStor::grow_convert()
{
int size = cvec.size();
int half = (size + 1) / 2; // round up!
//bool even = (size & 1) == 0;
int pair = size - 1;
for (int i = size - 1; i >= half; --i) {
cvec[i] = cvec[pair];
if (pair - 1 >= 0)
cvec[i] += cvec[pair - 1];
pair -= 2;
}
for (int i = half - 1; i >= 0; i--)
cvec[i] = Counter();
min_bucket = -max_bucket;// - (even ? bucket_size : 0);
bucket_size *= 2;
}
void
HistStor::grow_up()
{
int size = cvec.size();
int half = (size + 1) / 2; // round up!
int pair = 0;
for (int i = 0; i < half; i++) {
cvec[i] = cvec[pair];
if (pair + 1 < size)
cvec[i] += cvec[pair + 1];
pair += 2;
}
assert(pair == size || pair == size + 1);
for (int i = half; i < size; i++)
cvec[i] = Counter();
max_bucket *= 2;
bucket_size *= 2;
}
void
HistStor::add(HistStor *hs)
{
int b_size = hs->size();
assert(size() == b_size);
assert(min_bucket == hs->min_bucket);
sum += hs->sum;
logs += hs->logs;
squares += hs->squares;
samples += hs->samples;
while (bucket_size > hs->bucket_size)
hs->grow_up();
while (bucket_size < hs->bucket_size)
grow_up();
for (uint32_t i = 0; i < b_size; i++)
cvec[i] += hs->cvec[i];
}
Formula::Formula(Group *parent, const char *name, const char *desc)
: DataWrapVec<Formula, FormulaInfoProxy>(parent, name, desc)

View File

@@ -75,14 +75,13 @@
#include "base/stats/group.hh"
#include "base/stats/info.hh"
#include "base/stats/output.hh"
#include "base/stats/storage.hh"
#include "base/stats/types.hh"
#include "base/cast.hh"
#include "base/cprintf.hh"
#include "base/intmath.hh"
#include "base/str.hh"
#include "base/types.hh"
// For curTick().
#include "sim/core.hh"
/* A namespace for all of the Statistics */
namespace Stats {
@@ -172,11 +171,6 @@ class Vector2dInfoProxy : public InfoProxy<Stat, Vector2dInfo>
Result total() const { return this->s.total(); }
};
struct StorageParams
{
virtual ~StorageParams();
};
class InfoAccess
{
private:
@@ -482,167 +476,6 @@ class DataWrapVec2d : public DataWrapVec<Derived, InfoProxyType>
//
//////////////////////////////////////////////////////////////////////
/**
* Templatized storage and interface for a simple scalar stat.
*/
class StatStor
{
private:
/** The statistic value. */
Counter data;
public:
struct Params : public StorageParams {};
public:
/**
* Builds this storage element and calls the base constructor of the
* datatype.
*/
StatStor(Info *info)
: data(Counter())
{ }
/**
* The the stat to the given value.
* @param val The new value.
*/
void set(Counter val) { data = val; }
/**
* Increment the stat by the given value.
* @param val The new value.
*/
void inc(Counter val) { data += val; }
/**
* Decrement the stat by the given value.
* @param val The new value.
*/
void dec(Counter val) { data -= val; }
/**
* Return the value of this stat as its base type.
* @return The value of this stat.
*/
Counter value() const { return data; }
/**
* Return the value of this stat as a result type.
* @return The value of this stat.
*/
Result result() const { return (Result)data; }
/**
* Prepare stat data for dumping or serialization
*/
void prepare(Info *info) { }
/**
* Reset stat value to default
*/
void reset(Info *info) { data = Counter(); }
/**
* @return true if zero value
*/
bool zero() const { return data == Counter(); }
};
/**
* Templatized storage and interface to a per-tick average stat. This keeps
* a current count and updates a total (count * ticks) when this count
* changes. This allows the quick calculation of a per tick count of the item
* being watched. This is good for keeping track of residencies in structures
* among other things.
*/
class AvgStor
{
private:
/** The current count. */
Counter current;
/** The tick of the last reset */
Tick lastReset;
/** The total count for all tick. */
mutable Result total;
/** The tick that current last changed. */
mutable Tick last;
public:
struct Params : public StorageParams {};
public:
/**
* Build and initializes this stat storage.
*/
AvgStor(Info *info)
: current(0), lastReset(0), total(0), last(0)
{ }
/**
* Set the current count to the one provided, update the total and last
* set values.
* @param val The new count.
*/
void
set(Counter val)
{
total += current * (curTick() - last);
last = curTick();
current = val;
}
/**
* Increment the current count by the provided value, calls set.
* @param val The amount to increment.
*/
void inc(Counter val) { set(current + val); }
/**
* Deccrement the current count by the provided value, calls set.
* @param val The amount to decrement.
*/
void dec(Counter val) { set(current - val); }
/**
* Return the current count.
* @return The current count.
*/
Counter value() const { return current; }
/**
* Return the current average.
* @return The current average.
*/
Result
result() const
{
assert(last == curTick());
return (Result)(total + current) / (Result)(curTick() - lastReset + 1);
}
/**
* @return true if zero value
*/
bool zero() const { return total == 0.0; }
/**
* Prepare stat data for dumping or serialization
*/
void
prepare(Info *info)
{
total += current * (curTick() - last);
last = curTick();
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
total = 0.0;
last = curTick();
lastReset = curTick();
}
};
/**
* Implementation of a scalar stat. The type of stat is determined by the
* Storage template.
@@ -1406,462 +1239,6 @@ class Vector2dBase : public DataWrapVec2d<Derived, Vector2dInfoProxy>
// Non formula statistics
//
//////////////////////////////////////////////////////////////////////
/** The parameters for a distribution stat. */
struct DistParams : public StorageParams
{
const DistType type;
DistParams(DistType t) : type(t) {}
};
/**
* Templatized storage and interface for a distribution stat.
*/
class DistStor
{
public:
/** The parameters for a distribution stat. */
struct Params : public DistParams
{
/** The minimum value to track. */
Counter min;
/** The maximum value to track. */
Counter max;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The number of buckets. Equal to (max-min)/bucket_size. */
size_type buckets;
Params() : DistParams(Dist), min(0), max(0), bucket_size(0),
buckets(0) {}
};
private:
/** The minimum value to track. */
Counter min_track;
/** The maximum value to track. */
Counter max_track;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The smallest value sampled. */
Counter min_val;
/** The largest value sampled. */
Counter max_val;
/** The number of values sampled less than min. */
Counter underflow;
/** The number of values sampled more than max. */
Counter overflow;
/** The current sum. */
Counter sum;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
/** Counter for each bucket. */
VCounter cvec;
public:
DistStor(Info *info)
: cvec(safe_cast<const Params *>(info->storageParams)->buckets)
{
reset(info);
}
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
if (val < min_track)
underflow += number;
else if (val > max_track)
overflow += number;
else {
size_type index =
(size_type)std::floor((val - min_track) / bucket_size);
assert(index < size());
cvec[index] += number;
}
if (val < min_val)
min_val = val;
if (val > max_val)
max_val = val;
sum += val * number;
squares += val * val * number;
samples += number;
}
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cvec.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Dist);
data.type = params->type;
data.min = params->min;
data.max = params->max;
data.bucket_size = params->bucket_size;
data.min_val = (min_val == CounterLimits::max()) ? 0 : min_val;
data.max_val = (max_val == CounterLimits::min()) ? 0 : max_val;
data.underflow = underflow;
data.overflow = overflow;
data.cvec.resize(params->buckets);
for (off_type i = 0; i < params->buckets; ++i)
data.cvec[i] = cvec[i];
data.sum = sum;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
min_track = params->min;
max_track = params->max;
bucket_size = params->bucket_size;
min_val = CounterLimits::max();
max_val = CounterLimits::min();
underflow = Counter();
overflow = Counter();
size_type size = cvec.size();
for (off_type i = 0; i < size; ++i)
cvec[i] = Counter();
sum = Counter();
squares = Counter();
samples = Counter();
}
};
/**
* Templatized storage and interface for a histogram stat.
*/
class HistStor
{
public:
/** The parameters for a distribution stat. */
struct Params : public DistParams
{
/** The number of buckets.. */
size_type buckets;
Params() : DistParams(Hist), buckets(0) {}
};
private:
/** The minimum value to track. */
Counter min_bucket;
/** The maximum value to track. */
Counter max_bucket;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The current sum. */
Counter sum;
/** The sum of logarithm of each sample, used to compute geometric mean. */
Counter logs;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
/** Counter for each bucket. */
VCounter cvec;
public:
HistStor(Info *info)
: cvec(safe_cast<const Params *>(info->storageParams)->buckets)
{
reset(info);
}
void grow_up();
void grow_out();
void grow_convert();
void add(HistStor *);
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
assert(min_bucket < max_bucket);
if (val < min_bucket) {
if (min_bucket == 0)
grow_convert();
while (val < min_bucket)
grow_out();
} else if (val >= max_bucket + bucket_size) {
if (min_bucket == 0) {
while (val >= max_bucket + bucket_size)
grow_up();
} else {
while (val >= max_bucket + bucket_size)
grow_out();
}
}
size_type index =
(int64_t)std::floor((val - min_bucket) / bucket_size);
assert(index < size());
cvec[index] += number;
sum += val * number;
squares += val * val * number;
logs += log(val) * number;
samples += number;
}
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cvec.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Hist);
data.type = params->type;
data.min = min_bucket;
data.max = max_bucket + bucket_size - 1;
data.bucket_size = bucket_size;
data.min_val = min_bucket;
data.max_val = max_bucket;
int buckets = params->buckets;
data.cvec.resize(buckets);
for (off_type i = 0; i < buckets; ++i)
data.cvec[i] = cvec[i];
data.sum = sum;
data.logs = logs;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
min_bucket = 0;
max_bucket = params->buckets - 1;
bucket_size = 1;
size_type size = cvec.size();
for (off_type i = 0; i < size; ++i)
cvec[i] = Counter();
sum = Counter();
squares = Counter();
samples = Counter();
logs = Counter();
}
};
/**
* Templatized storage and interface for a distribution that calculates mean
* and variance.
*/
class SampleStor
{
public:
struct Params : public DistParams
{
Params() : DistParams(Deviation) {}
};
private:
/** The current sum. */
Counter sum;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
public:
/**
* Create and initialize this storage.
*/
SampleStor(Info *info)
: sum(Counter()), squares(Counter()), samples(Counter())
{ }
/**
* Add a value the given number of times to this running average.
* Update the running sum and sum of squares, increment the number of
* values seen by the given number.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
sum += val * number;
squares += val * val * number;
samples += number;
}
/**
* Return the number of entries in this stat, 1
* @return 1.
*/
size_type size() const { return 1; }
/**
* Return true if no samples have been added.
* @return True if no samples have been added.
*/
bool zero() const { return samples == Counter(); }
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Deviation);
data.type = params->type;
data.sum = sum;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
sum = Counter();
squares = Counter();
samples = Counter();
}
};
/**
* Templatized storage for distribution that calculates per tick mean and
* variance.
*/
class AvgSampleStor
{
public:
struct Params : public DistParams
{
Params() : DistParams(Deviation) {}
};
private:
/** Current total. */
Counter sum;
/** Current sum of squares. */
Counter squares;
public:
/**
* Create and initialize this storage.
*/
AvgSampleStor(Info *info)
: sum(Counter()), squares(Counter())
{}
/**
* Add a value to the distribution for the given number of times.
* Update the running sum and sum of squares.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
sum += val * number;
squares += val * val * number;
}
/**
* Return the number of entries, in this case 1.
* @return 1.
*/
size_type size() const { return 1; }
/**
* Return true if no samples have been added.
* @return True if the sum is zero.
*/
bool zero() const { return sum == Counter(); }
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Deviation);
data.type = params->type;
data.sum = sum;
data.squares = squares;
data.samples = curTick();
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
sum = Counter();
squares = Counter();
}
};
/**
* Implementation of a distribution stat. The type of distribution is
@@ -1953,7 +1330,6 @@ class DistBase : public DataWrap<Derived, DistInfoProxy>
* Add the argument distribution to the this distribution.
*/
void add(DistBase &d) { data()->add(d.data()); }
};
template <class Stat>
@@ -2928,81 +2304,6 @@ class SparseHistBase : public DataWrap<Derived, SparseHistInfoProxy>
}
};
/**
* Templatized storage and interface for a sparse histogram stat.
*/
class SparseHistStor
{
public:
/** The parameters for a sparse histogram stat. */
struct Params : public DistParams
{
Params() : DistParams(Hist) {}
};
private:
/** Counter for number of samples */
Counter samples;
/** Counter for each bucket. */
MCounter cmap;
public:
SparseHistStor(Info *info)
{
reset(info);
}
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
cmap[val] += number;
samples += number;
}
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cmap.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, SparseHistData &data)
{
MCounter::iterator it;
data.cmap.clear();
for (it = cmap.begin(); it != cmap.end(); it++) {
data.cmap[(*it).first] = (*it).second;
}
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
cmap.clear();
samples = 0;
}
};
class SparseHistogram : public SparseHistBase<SparseHistogram, SparseHistStor>
{
public:

View File

@@ -30,6 +30,7 @@ Import('*')
Source('group.cc')
Source('info.cc')
Source('storage.cc')
Source('text.cc')
if env['USE_HDF5']:
@@ -38,3 +39,5 @@ if env['USE_HDF5']:
else:
Source('hdf5.cc')
GTest('storage.test', 'storage.test.cc', '../debug.cc', '../str.cc', 'info.cc',
'storage.cc', '../../sim/cur_tick.cc')

228
src/base/stats/storage.cc Normal file
View File

@@ -0,0 +1,228 @@
/*
* Copyright (c) 2021 Daniel R. Carvalho
* Copyright (c) 2019 Arm Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2003-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "base/stats/storage.hh"
#include <cmath>
namespace Stats {
void
DistStor::sample(Counter val, int number)
{
assert(bucket_size > 0);
if (val < min_track)
underflow += number;
else if (val > max_track)
overflow += number;
else {
size_type index =
(size_type)std::floor((val - min_track) / bucket_size);
assert(index < size());
cvec[index] += number;
}
if (val < min_val)
min_val = val;
if (val > max_val)
max_val = val;
sum += val * number;
squares += val * val * number;
samples += number;
}
void
HistStor::growOut()
{
int size = cvec.size();
int zero = size / 2; // round down!
int top_half = zero + (size - zero + 1) / 2; // round up!
int bottom_half = (size - zero) / 2; // round down!
// grow down
int low_pair = zero - 1;
for (int i = zero - 1; i >= bottom_half; i--) {
cvec[i] = cvec[low_pair];
if (low_pair - 1 >= 0)
cvec[i] += cvec[low_pair - 1];
low_pair -= 2;
}
assert(low_pair == 0 || low_pair == -1 || low_pair == -2);
for (int i = bottom_half - 1; i >= 0; i--)
cvec[i] = Counter();
// grow up
int high_pair = zero;
for (int i = zero; i < top_half; i++) {
cvec[i] = cvec[high_pair];
if (high_pair + 1 < size)
cvec[i] += cvec[high_pair + 1];
high_pair += 2;
}
assert(high_pair == size || high_pair == size + 1);
for (int i = top_half; i < size; i++)
cvec[i] = Counter();
max_bucket *= 2;
min_bucket *= 2;
bucket_size *= 2;
}
void
HistStor::growDown()
{
const int size = cvec.size();
const int zero = size / 2; // round down!
const bool even = ((size - 1) % 2) == 0;
// Make sure that zero becomes the lower bound of the middle bucket. On
// an even number of buckets the last bucket does not change its lower
// bound, therefore it does not need to absorb any other bucket
int pair = size - 1;
if (even) {
pair--;
}
for (int i = pair; i >= zero; --i) {
cvec[i] = cvec[pair];
if (pair - 1 >= 0)
cvec[i] += cvec[pair - 1];
pair -= 2;
}
for (int i = zero - 1; i >= 0; i--)
cvec[i] = Counter();
// Double the range by using the negative of the lower bound of the last
// bucket as the new lower bound of the first bucket
min_bucket = -max_bucket;
// A special case must be handled when there is an odd number of
// buckets so that zero is kept as the lower bound of the middle bucket
if (!even) {
min_bucket -= bucket_size;
max_bucket -= bucket_size;
}
// Only update the bucket size once the range has been updated
bucket_size *= 2;
}
void
HistStor::growUp()
{
int size = cvec.size();
int half = (size + 1) / 2; // round up!
int pair = 0;
for (int i = 0; i < half; i++) {
cvec[i] = cvec[pair];
if (pair + 1 < size)
cvec[i] += cvec[pair + 1];
pair += 2;
}
assert(pair == size || pair == size + 1);
for (int i = half; i < size; i++)
cvec[i] = Counter();
max_bucket *= 2;
bucket_size *= 2;
}
void
HistStor::sample(Counter val, int number)
{
assert(min_bucket < max_bucket);
if (val < min_bucket) {
if (min_bucket == 0)
growDown();
while (val < min_bucket)
growOut();
} else if (val >= max_bucket + bucket_size) {
if (min_bucket == 0) {
while (val >= max_bucket + bucket_size)
growUp();
} else {
while (val >= max_bucket + bucket_size)
growOut();
}
}
assert(bucket_size > 0);
size_type index =
(int64_t)std::floor((val - min_bucket) / bucket_size);
assert(index < size());
cvec[index] += number;
sum += val * number;
squares += val * val * number;
logs += std::log(val) * number;
samples += number;
}
void
HistStor::add(HistStor *hs)
{
int b_size = hs->size();
assert(size() == b_size);
assert(min_bucket == hs->min_bucket);
sum += hs->sum;
logs += hs->logs;
squares += hs->squares;
samples += hs->samples;
while (bucket_size > hs->bucket_size)
hs->growUp();
while (bucket_size < hs->bucket_size)
growUp();
for (uint32_t i = 0; i < b_size; i++)
cvec[i] += hs->cvec[i];
}
} // namespace Stats

767
src/base/stats/storage.hh Normal file
View File

@@ -0,0 +1,767 @@
/*
* Copyright (c) 2021 Daniel R. Carvalho
* Copyright (c) 2003-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __BASE_STATS_STORAGE_HH__
#define __BASE_STATS_STORAGE_HH__
#include <cassert>
#include "base/cast.hh"
#include "base/logging.hh"
#include "base/stats/info.hh"
#include "base/stats/types.hh"
// For curTick().
#include "sim/core.hh"
namespace Stats {
struct StorageParams
{
virtual ~StorageParams() = default;
};
/**
* Templatized storage and interface for a simple scalar stat.
*/
class StatStor
{
private:
/** The statistic value. */
Counter data;
public:
struct Params : public StorageParams {};
/**
* Builds this storage element and calls the base constructor of the
* datatype.
*/
StatStor(Info *info)
: data(Counter())
{ }
/**
* The the stat to the given value.
* @param val The new value.
*/
void set(Counter val) { data = val; }
/**
* Increment the stat by the given value.
* @param val The new value.
*/
void inc(Counter val) { data += val; }
/**
* Decrement the stat by the given value.
* @param val The new value.
*/
void dec(Counter val) { data -= val; }
/**
* Return the value of this stat as its base type.
* @return The value of this stat.
*/
Counter value() const { return data; }
/**
* Return the value of this stat as a result type.
* @return The value of this stat.
*/
Result result() const { return (Result)data; }
/**
* Prepare stat data for dumping or serialization
*/
void prepare(Info *info) { }
/**
* Reset stat value to default
*/
void reset(Info *info) { data = Counter(); }
/**
* @return true if zero value
*/
bool zero() const { return data == Counter(); }
};
/**
* Templatized storage and interface to a per-tick average stat. This keeps
* a current count and updates a total (count * ticks) when this count
* changes. This allows the quick calculation of a per tick count of the item
* being watched. This is good for keeping track of residencies in structures
* among other things.
*/
class AvgStor
{
private:
/** The current count. */
Counter current;
/** The tick of the last reset */
Tick lastReset;
/** The total count for all tick. */
mutable Result total;
/** The tick that current last changed. */
mutable Tick last;
public:
struct Params : public StorageParams {};
/**
* Build and initializes this stat storage.
*/
AvgStor(Info *info)
: current(0), lastReset(0), total(0), last(0)
{ }
/**
* Set the current count to the one provided, update the total and last
* set values.
* @param val The new count.
*/
void
set(Counter val)
{
total += current * (curTick() - last);
last = curTick();
current = val;
}
/**
* Increment the current count by the provided value, calls set.
* @param val The amount to increment.
*/
void inc(Counter val) { set(current + val); }
/**
* Deccrement the current count by the provided value, calls set.
* @param val The amount to decrement.
*/
void dec(Counter val) { set(current - val); }
/**
* Return the current count.
* @return The current count.
*/
Counter value() const { return current; }
/**
* Return the current average.
* @return The current average.
*/
Result
result() const
{
assert(last == curTick());
return (Result)(total + current) / (Result)(curTick() - lastReset + 1);
}
/**
* @return true if zero value
*/
bool zero() const { return total == 0.0; }
/**
* Prepare stat data for dumping or serialization
*/
void
prepare(Info *info)
{
total += current * (curTick() - last);
last = curTick();
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
total = 0.0;
last = curTick();
lastReset = curTick();
}
};
/** The parameters for a distribution stat. */
struct DistParams : public StorageParams
{
const DistType type;
DistParams(DistType t) : type(t) {}
};
/**
* Templatized storage and interface for a distribution stat. A distribution
* uses buckets to keep track of values within a given range. All other
* values, although accounted for on the overall calculations, are not tracked
* in buckets themselves; two special counters, underflow and overflow store
* the number of occurrences of such values.
*/
class DistStor
{
private:
/** The minimum value to track. */
Counter min_track;
/** The maximum value to track. */
Counter max_track;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The smallest value sampled. */
Counter min_val;
/** The largest value sampled. */
Counter max_val;
/** The number of values sampled less than min. */
Counter underflow;
/** The number of values sampled more than max. */
Counter overflow;
/** The current sum. */
Counter sum;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
/** Counter for each bucket. */
VCounter cvec;
public:
/** The parameters for a distribution stat. */
struct Params : public DistParams
{
/** The minimum value to track. */
Counter min;
/** The maximum value to track. */
Counter max;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The number of buckets. Equal to (max-min)/bucket_size. */
size_type buckets;
Params() : DistParams(Dist), min(0), max(0), bucket_size(0),
buckets(0) {}
};
DistStor(Info *info)
: cvec(safe_cast<const Params *>(info->storageParams)->buckets)
{
reset(info);
}
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void sample(Counter val, int number);
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cvec.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Dist);
data.type = params->type;
data.min = params->min;
data.max = params->max;
data.bucket_size = params->bucket_size;
data.min_val = (min_val == CounterLimits::max()) ? 0 : min_val;
data.max_val = (max_val == CounterLimits::min()) ? 0 : max_val;
data.underflow = underflow;
data.overflow = overflow;
data.cvec.resize(params->buckets);
for (off_type i = 0; i < params->buckets; ++i)
data.cvec[i] = cvec[i];
data.sum = sum;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
min_track = params->min;
max_track = params->max;
bucket_size = params->bucket_size;
min_val = CounterLimits::max();
max_val = CounterLimits::min();
underflow = Counter();
overflow = Counter();
size_type size = cvec.size();
for (off_type i = 0; i < size; ++i)
cvec[i] = Counter();
sum = Counter();
squares = Counter();
samples = Counter();
}
};
/**
* Templatized storage and interface for a histogram stat.
*
* The number of buckets is fixed on initialization; however, the bucket size
* isn't. That means that when samples that are outside the current range are
* seen, the bucket size will be increased so that each bucket can hold a
* bigger range of values. When that happens, the bucket's contents are re-
* located.
*
* The min and max bucket values can only be, respectively, decreased and
* increased when sampling. If this wasn't true, samples that were previously
* within the buclet range could not be anymore within the valid range, making
* the storage's state incoherent. These values are set back to their initial
* states on reset().
*
* The bucket range always is zero-centric. While the storage does not
* contain negative values the bucket range will keep its lower bound at
* zero, doubling the upper bound when needed; However, as soon a negative
* value is sampled, zero becomes the lower bound of the middle (rounded up)
* bucket. Although this means that the histogram will not be symmetric if
* negative values are sampled, it makes it possible to grow the buckets
* without keeping track of the individual values.
*
* This happens because if zero was not a lower or upper bound, when its
* value was doubled, the lower and upper bound of the bucket containing
* zero would intersect with middle values of the previous and next buckets.
* For example, if the bucket containing zero has range [-2,2[, therefore
* its neighbor buckets would have ranges at [-6,-2[ and [2,6[. When the
* buckets are grown, the zero bucket would grow its range to [-4,4[, which
* cannot be easily extracted from the neighor buckets.
*/
class HistStor
{
private:
/** Lower bound of the first bucket's range. */
Counter min_bucket;
/** Lower bound of the last bucket's range. */
Counter max_bucket;
/** The number of entries in each bucket. */
Counter bucket_size;
/** The current sum. */
Counter sum;
/** The sum of logarithm of each sample, used to compute geometric mean. */
Counter logs;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
/** Counter for each bucket. */
VCounter cvec;
/**
* Given a bucket size B, and a range of values [0, N], this function
* doubles the bucket size to double the range of values towards the
* positive infinite; that is, double the upper range of this storage
* so that the range becomes [0, 2*N].
*
* Because the bucket size is doubled, the buckets contents are rearranged,
* since the original range of values is mapped to the lower half buckets.
*/
void growUp();
/**
* Given a bucket size B, and a range of values [M, N], where M < 0, this
* function doubles the bucket size to double the range of values towards
* both positive and negative infinites; that is, it doubles both the lower
* and the upper range of this storage so that the range becomes
* [2*M, 2*N].
*
* Because the bucket size is doubled, the buckets contents are
* rearranged, and the original range of values are redistributed to free
* buckets for the newly appended ranges.
*/
void growOut();
/**
* Given a bucket size B, and a range of values [0, N], this function
* doubles the bucket size to double the range of values towards the
* negative infinite; that is, it doubles the lower range of this
* storage so that the middle buckes contaihs zero as a lower bound. As
* such, the storage range becomes [-N, N+B] if there is an odd number
* of buckets, and [-N-B, N+B] if there is an even number of buckets.
*
* Because the bucket size is doubled, the buckets contents are
* rearranged, and the original range of values are redistributed to free
* buckets for the newly appended ranges.
*/
void growDown();
public:
/** The parameters for a distribution stat. */
struct Params : public DistParams
{
/** The number of buckets. */
size_type buckets;
Params() : DistParams(Hist), buckets(0) {}
};
HistStor(Info *info)
: cvec(safe_cast<const Params *>(info->storageParams)->buckets)
{
fatal_if(cvec.size() == 1,
"There must be at least two buckets in a histogram");
reset(info);
}
/**
* Adds the contents of the given storage to this storage.
* @param other The other storage to be added.
*/
void add(HistStor *other);
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void sample(Counter val, int number);
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cvec.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Hist);
data.type = params->type;
data.min = min_bucket;
data.max = max_bucket + bucket_size - 1;
data.bucket_size = bucket_size;
data.min_val = min_bucket;
data.max_val = max_bucket;
int buckets = params->buckets;
data.cvec.resize(buckets);
for (off_type i = 0; i < buckets; ++i)
data.cvec[i] = cvec[i];
data.sum = sum;
data.logs = logs;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
min_bucket = 0;
max_bucket = params->buckets - 1;
bucket_size = 1;
size_type size = cvec.size();
for (off_type i = 0; i < size; ++i)
cvec[i] = Counter();
sum = Counter();
squares = Counter();
samples = Counter();
logs = Counter();
}
};
/**
* Templatized storage and interface for a distribution that calculates mean
* and variance.
*/
class SampleStor
{
private:
/** The current sum. */
Counter sum;
/** The sum of squares. */
Counter squares;
/** The number of samples. */
Counter samples;
public:
struct Params : public DistParams
{
Params() : DistParams(Deviation) {}
};
/**
* Create and initialize this storage.
*/
SampleStor(Info *info)
: sum(Counter()), squares(Counter()), samples(Counter())
{ }
/**
* Add a value the given number of times to this running average.
* Update the running sum and sum of squares, increment the number of
* values seen by the given number.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
sum += val * number;
squares += val * val * number;
samples += number;
}
/**
* Return the number of entries in this stat, 1
* @return 1.
*/
size_type size() const { return 1; }
/**
* Return true if no samples have been added.
* @return True if no samples have been added.
*/
bool zero() const { return samples == Counter(); }
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Deviation);
data.type = params->type;
data.sum = sum;
data.squares = squares;
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
sum = Counter();
squares = Counter();
samples = Counter();
}
};
/**
* Templatized storage for distribution that calculates per tick mean and
* variance.
*/
class AvgSampleStor
{
private:
/** Current total. */
Counter sum;
/** Current sum of squares. */
Counter squares;
public:
struct Params : public DistParams
{
Params() : DistParams(Deviation) {}
};
/**
* Create and initialize this storage.
*/
AvgSampleStor(Info *info)
: sum(Counter()), squares(Counter())
{}
/**
* Add a value to the distribution for the given number of times.
* Update the running sum and sum of squares.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
sum += val * number;
squares += val * val * number;
}
/**
* Return the number of entries, in this case 1.
* @return 1.
*/
size_type size() const { return 1; }
/**
* Return true if no samples have been added.
* @return True if the sum is zero.
*/
bool zero() const { return sum == Counter(); }
void
prepare(Info *info, DistData &data)
{
const Params *params = safe_cast<const Params *>(info->storageParams);
assert(params->type == Deviation);
data.type = params->type;
data.sum = sum;
data.squares = squares;
data.samples = curTick();
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
sum = Counter();
squares = Counter();
}
};
/**
* Templatized storage and interface for a sparse histogram stat. There
* is no actual limit on the number of buckets, and each of them has a size
* of 1, meaning that samples are individually recorded, and there is no
* need to keep track of the samples that occur in between two distant
* sampled values.
*/
class SparseHistStor
{
private:
/** Counter for number of samples */
Counter samples;
/** Counter for each bucket. */
MCounter cmap;
public:
/** The parameters for a sparse histogram stat. */
struct Params : public DistParams
{
Params() : DistParams(Hist) {}
};
SparseHistStor(Info *info)
{
reset(info);
}
/**
* Add a value to the distribution for the given number of times.
* @param val The value to add.
* @param number The number of times to add the value.
*/
void
sample(Counter val, int number)
{
cmap[val] += number;
samples += number;
}
/**
* Return the number of buckets in this distribution.
* @return the number of buckets.
*/
size_type size() const { return cmap.size(); }
/**
* Returns true if any calls to sample have been made.
* @return True if any values have been sampled.
*/
bool
zero() const
{
return samples == Counter();
}
void
prepare(Info *info, SparseHistData &data)
{
MCounter::iterator it;
data.cmap.clear();
for (it = cmap.begin(); it != cmap.end(); it++) {
data.cmap[(*it).first] = (*it).second;
}
data.samples = samples;
}
/**
* Reset stat value to default
*/
void
reset(Info *info)
{
cmap.clear();
samples = 0;
}
};
} // namespace Stats
#endif // __BASE_STATS_STORAGE_HH__

File diff suppressed because it is too large Load Diff