Files
gem5/src/python/m5/ext/pystats/statistic.py
Bobby R. Bruce 7f0290985f stdlib,tests: Add Pyunit tests to check Pyunit nav, fix bugs
Bigs fixed of note:

1. The 'find' method has been fixed to work. This involved making
   'children' a class implemented per-subclass as required.
2. The 'get_all_stats_of_name' method has been removed. This was not
   working at all correctly and is largely doing what 'find' does.
2. The functionality to get an element in a vector via an attribute call
   (i.e., self.vector1 == self.vector[1]) has been implemented this
   maintaining backwards compatibility with the regular Python stats.

Change-Id: I31a4ccc723937018a3038dcdf491c82629ddbbb2
2024-05-30 03:02:06 -07:00

356 lines
10 KiB
Python

# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from abc import ABC
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
Optional,
Union,
)
from .abstract_stat import AbstractStat
from .storagetype import StorageType
class Statistic(ABC, AbstractStat):
"""
The abstract base class for all Python statistics.
"""
value: Any
type: Optional[str]
description: Optional[str]
def __init__(
self,
value: Any,
type: Optional[str] = None,
description: Optional[str] = None,
):
self.value = value
self.type = type
self.description = description
def __repr__(self):
return str(self.value)
class Scalar(Statistic):
"""
A scalar Python statistic type.
"""
value: Union[float, int]
unit: Optional[str]
datatype: Optional[StorageType]
def __init__(
self,
value: Union[float, int],
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None,
):
super().__init__(
value=value,
type="Scalar",
description=description,
)
self.unit = unit
self.datatype = datatype
class Vector(Statistic):
"""
An Python statistics which representing a vector of Scalar values.
"""
def __init__(
self,
value: Dict[Union[str, int, float], Scalar],
type: Optional[str] = None,
description: Optional[str] = None,
):
super().__init__(
value=value,
type=type,
description=description,
)
def __getitem__(self, item: Union[int, str, float]) -> Scalar:
assert self.value != None
# In the case of string, we cast strings to integers of floats if they
# are numeric. This avoids users having to cast strings to integers.
if isinstance(item, str):
if item.isdigit():
item = int(item)
elif item.isnumeric():
item = float(item)
return self.value[item]
def __contains__(self, item) -> bool:
assert self.value != None
if isinstance(item, str):
if item.isdigit():
item = int(item)
elif item.isnumeric():
item = float(item)
return item in self.value
def __iner__(self) -> None:
return iter(self.value)
def __len__(self) -> int:
assert self.value != None
return len(self.value.values())
def size(self) -> int:
"""
Returns the size of the vector.
:returns: The size of the vector.
"""
assert self.value != None
return len(self.value)
def mean(self) -> float:
"""
Returns the mean of the value vector.
:returns: The mean value across all values in the vector.
"""
assert self.value != None
return self.count() / self.size()
def count(self) -> float:
"""
Returns the count (sum) of all values in the vector.
:returns: The sum of all vector values.
"""
assert self.value != None
return sum(float(self.value[key]) for key in self.values)
def children(
self,
predicate: Optional[Callable[[str], bool]] = None,
recursive: bool = False,
) -> List["AbstractStat"]:
to_return = []
for attr in self.value.keys():
obj = self.value[attr]
if isinstance(obj, AbstractStat):
if (
isinstance(attr, str)
and (predicate and predicate(attr))
or not predicate
):
to_return.append(obj)
to_return = to_return + obj.children(
predicate=predicate, recursive=True
)
return to_return
class Vector2d(Statistic):
"""
A 2D vector of scalar values.
"""
value: Dict[Union[str, int, float], Vector]
def __init__(
self,
value: Dict[Union[str, int, float], Vector],
type: Optional[str] = None,
description: Optional[str] = None,
):
assert (
len({vector.size() for vector in value.values()}) == 1
), "All the Vectors in the 2d Vector are not of equal length."
super().__init__(
value=value,
type=type,
description=description,
)
def x_size(self) -> int:
"""Returns the number of elements in the x dimension."""
assert self.value is not None
return len(self.value)
def y_size(self) -> int:
"""Returns the number of elements in the y dimension."""
assert self.value is not None
return len(self.value[0])
def size(self) -> int:
"""Returns the total number of elements."""
return self.x_size() * self.y_size()
def __len__(self) -> int:
return self.x_size()
def __iter__(self):
return iter(self.keys())
def total(self) -> int:
"""The total (sum) of all the entries in the 2d vector/"""
assert self.value is not None
total = 0
for vector in self.value.values():
for scalar in vector.values():
total += scalar.value
return total
def __getitem__(self, index: Union[str, int, float]) -> Vector:
assert self.value is not None
# In the case of string, we cast strings to integers of floats if they
# are numeric. This avoids users having to cast strings to integers.
if isinstance(index, str):
if index.isindex():
index = int(index)
elif index.isnumeric():
index = float(index)
return self.value[index]
def children(
self,
predicate: Optional[Callable[[str], bool]] = None,
recursive: bool = False,
) -> List["AbstractStat"]:
to_return = []
for attr in self.value.keys():
obj = self.value[attr]
if (
isinstance(attr, str)
and (predicate and predicate(attr))
or not predicate
):
to_return.append(obj)
to_return = to_return + obj.children(
predicate=predicate, recursive=True
)
return to_return
def __contains__(self, item) -> bool:
assert self.value is not None
if isinstance(item, str):
if item.isdigit():
item = int(item)
elif item.isnumeric():
item = float(item)
return item in self.value
class Distribution(Vector):
"""
A statistic type that stores information relating to distributions. Each
distribution has a number of bins (>=1)
between this range. The values correspond to the value of each bin.
E.g., ``value[3]`` is the value of the 4th bin.
It is assumed each bucket is of equal size.
"""
min: Union[float, int]
max: Union[float, int]
num_bins: int
bin_size: Union[float, int]
sum: Optional[int]
sum_squared: Optional[int]
underflow: Optional[int]
overflow: Optional[int]
logs: Optional[float]
def __init__(
self,
value: Dict[Union[int, float], Scalar],
min: Union[float, int],
max: Union[float, int],
num_bins: int,
bin_size: Union[float, int],
sum: Optional[int] = None,
sum_squared: Optional[int] = None,
underflow: Optional[int] = None,
overflow: Optional[int] = None,
logs: Optional[float] = None,
description: Optional[str] = None,
):
super().__init__(
value=value,
type="Distribution",
description=description,
)
self.min = min
self.max = max
self.num_bins = num_bins
self.bin_size = bin_size
self.sum = sum
self.underflow = underflow
self.overflow = overflow
self.logs = logs
self.sum_squared = sum_squared
# These check some basic conditions of a distribution.
assert self.bin_size >= 0
assert self.num_bins >= 1
class SparseHist(Vector):
"""A Sparse Histogram of values. A sparse histogram simply counts the "
frequency of each value in a sample. Ergo, it is, ineffect an disctionary
of values mapped to their count"""
def __init__(
self,
value: Dict[float, Scalar],
description: Optional[str] = None,
):
super().__init__(
value=value,
type="SparseHist",
description=description,
)
def size(self) -> int:
"""The number of unique sampled values."""
return len(self.value)
def count(self) -> int:
"""
Returns the total number of samples.
"""
assert self.value != None
return sum(self.value.values())