Bigs fixed of note: 1. The 'find' method has been fixed to work. This involved making 'children' a class implemented per-subclass as required. 2. The 'get_all_stats_of_name' method has been removed. This was not working at all correctly and is largely doing what 'find' does. 2. The functionality to get an element in a vector via an attribute call (i.e., self.vector1 == self.vector[1]) has been implemented this maintaining backwards compatibility with the regular Python stats. Change-Id: I31a4ccc723937018a3038dcdf491c82629ddbbb2
356 lines
10 KiB
Python
356 lines
10 KiB
Python
# Copyright (c) 2021 The Regents of The University of California
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met: redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer;
|
|
# redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution;
|
|
# neither the name of the copyright holders nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
from abc import ABC
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
Iterable,
|
|
List,
|
|
Optional,
|
|
Union,
|
|
)
|
|
|
|
from .abstract_stat import AbstractStat
|
|
from .storagetype import StorageType
|
|
|
|
|
|
class Statistic(ABC, AbstractStat):
|
|
"""
|
|
The abstract base class for all Python statistics.
|
|
"""
|
|
|
|
value: Any
|
|
type: Optional[str]
|
|
description: Optional[str]
|
|
|
|
def __init__(
|
|
self,
|
|
value: Any,
|
|
type: Optional[str] = None,
|
|
description: Optional[str] = None,
|
|
):
|
|
self.value = value
|
|
self.type = type
|
|
self.description = description
|
|
|
|
def __repr__(self):
|
|
return str(self.value)
|
|
|
|
|
|
class Scalar(Statistic):
|
|
"""
|
|
A scalar Python statistic type.
|
|
"""
|
|
|
|
value: Union[float, int]
|
|
unit: Optional[str]
|
|
datatype: Optional[StorageType]
|
|
|
|
def __init__(
|
|
self,
|
|
value: Union[float, int],
|
|
unit: Optional[str] = None,
|
|
description: Optional[str] = None,
|
|
datatype: Optional[StorageType] = None,
|
|
):
|
|
super().__init__(
|
|
value=value,
|
|
type="Scalar",
|
|
description=description,
|
|
)
|
|
self.unit = unit
|
|
self.datatype = datatype
|
|
|
|
|
|
class Vector(Statistic):
|
|
"""
|
|
An Python statistics which representing a vector of Scalar values.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
value: Dict[Union[str, int, float], Scalar],
|
|
type: Optional[str] = None,
|
|
description: Optional[str] = None,
|
|
):
|
|
super().__init__(
|
|
value=value,
|
|
type=type,
|
|
description=description,
|
|
)
|
|
|
|
def __getitem__(self, item: Union[int, str, float]) -> Scalar:
|
|
assert self.value != None
|
|
# In the case of string, we cast strings to integers of floats if they
|
|
# are numeric. This avoids users having to cast strings to integers.
|
|
if isinstance(item, str):
|
|
if item.isdigit():
|
|
item = int(item)
|
|
elif item.isnumeric():
|
|
item = float(item)
|
|
return self.value[item]
|
|
|
|
def __contains__(self, item) -> bool:
|
|
assert self.value != None
|
|
if isinstance(item, str):
|
|
if item.isdigit():
|
|
item = int(item)
|
|
elif item.isnumeric():
|
|
item = float(item)
|
|
return item in self.value
|
|
|
|
def __iner__(self) -> None:
|
|
return iter(self.value)
|
|
|
|
def __len__(self) -> int:
|
|
assert self.value != None
|
|
return len(self.value.values())
|
|
|
|
def size(self) -> int:
|
|
"""
|
|
Returns the size of the vector.
|
|
|
|
:returns: The size of the vector.
|
|
"""
|
|
assert self.value != None
|
|
return len(self.value)
|
|
|
|
def mean(self) -> float:
|
|
"""
|
|
Returns the mean of the value vector.
|
|
|
|
:returns: The mean value across all values in the vector.
|
|
"""
|
|
assert self.value != None
|
|
|
|
return self.count() / self.size()
|
|
|
|
def count(self) -> float:
|
|
"""
|
|
Returns the count (sum) of all values in the vector.
|
|
|
|
:returns: The sum of all vector values.
|
|
"""
|
|
assert self.value != None
|
|
return sum(float(self.value[key]) for key in self.values)
|
|
|
|
def children(
|
|
self,
|
|
predicate: Optional[Callable[[str], bool]] = None,
|
|
recursive: bool = False,
|
|
) -> List["AbstractStat"]:
|
|
to_return = []
|
|
for attr in self.value.keys():
|
|
obj = self.value[attr]
|
|
if isinstance(obj, AbstractStat):
|
|
if (
|
|
isinstance(attr, str)
|
|
and (predicate and predicate(attr))
|
|
or not predicate
|
|
):
|
|
to_return.append(obj)
|
|
to_return = to_return + obj.children(
|
|
predicate=predicate, recursive=True
|
|
)
|
|
return to_return
|
|
|
|
|
|
class Vector2d(Statistic):
|
|
"""
|
|
A 2D vector of scalar values.
|
|
"""
|
|
|
|
value: Dict[Union[str, int, float], Vector]
|
|
|
|
def __init__(
|
|
self,
|
|
value: Dict[Union[str, int, float], Vector],
|
|
type: Optional[str] = None,
|
|
description: Optional[str] = None,
|
|
):
|
|
assert (
|
|
len({vector.size() for vector in value.values()}) == 1
|
|
), "All the Vectors in the 2d Vector are not of equal length."
|
|
|
|
super().__init__(
|
|
value=value,
|
|
type=type,
|
|
description=description,
|
|
)
|
|
|
|
def x_size(self) -> int:
|
|
"""Returns the number of elements in the x dimension."""
|
|
assert self.value is not None
|
|
return len(self.value)
|
|
|
|
def y_size(self) -> int:
|
|
"""Returns the number of elements in the y dimension."""
|
|
assert self.value is not None
|
|
return len(self.value[0])
|
|
|
|
def size(self) -> int:
|
|
"""Returns the total number of elements."""
|
|
return self.x_size() * self.y_size()
|
|
|
|
def __len__(self) -> int:
|
|
return self.x_size()
|
|
|
|
def __iter__(self):
|
|
return iter(self.keys())
|
|
|
|
def total(self) -> int:
|
|
"""The total (sum) of all the entries in the 2d vector/"""
|
|
assert self.value is not None
|
|
total = 0
|
|
for vector in self.value.values():
|
|
for scalar in vector.values():
|
|
total += scalar.value
|
|
return total
|
|
|
|
def __getitem__(self, index: Union[str, int, float]) -> Vector:
|
|
assert self.value is not None
|
|
# In the case of string, we cast strings to integers of floats if they
|
|
# are numeric. This avoids users having to cast strings to integers.
|
|
if isinstance(index, str):
|
|
if index.isindex():
|
|
index = int(index)
|
|
elif index.isnumeric():
|
|
index = float(index)
|
|
return self.value[index]
|
|
|
|
def children(
|
|
self,
|
|
predicate: Optional[Callable[[str], bool]] = None,
|
|
recursive: bool = False,
|
|
) -> List["AbstractStat"]:
|
|
to_return = []
|
|
for attr in self.value.keys():
|
|
obj = self.value[attr]
|
|
if (
|
|
isinstance(attr, str)
|
|
and (predicate and predicate(attr))
|
|
or not predicate
|
|
):
|
|
to_return.append(obj)
|
|
to_return = to_return + obj.children(
|
|
predicate=predicate, recursive=True
|
|
)
|
|
return to_return
|
|
|
|
def __contains__(self, item) -> bool:
|
|
assert self.value is not None
|
|
if isinstance(item, str):
|
|
if item.isdigit():
|
|
item = int(item)
|
|
elif item.isnumeric():
|
|
item = float(item)
|
|
return item in self.value
|
|
|
|
|
|
class Distribution(Vector):
|
|
"""
|
|
A statistic type that stores information relating to distributions. Each
|
|
distribution has a number of bins (>=1)
|
|
between this range. The values correspond to the value of each bin.
|
|
E.g., ``value[3]`` is the value of the 4th bin.
|
|
|
|
It is assumed each bucket is of equal size.
|
|
"""
|
|
|
|
min: Union[float, int]
|
|
max: Union[float, int]
|
|
num_bins: int
|
|
bin_size: Union[float, int]
|
|
sum: Optional[int]
|
|
sum_squared: Optional[int]
|
|
underflow: Optional[int]
|
|
overflow: Optional[int]
|
|
logs: Optional[float]
|
|
|
|
def __init__(
|
|
self,
|
|
value: Dict[Union[int, float], Scalar],
|
|
min: Union[float, int],
|
|
max: Union[float, int],
|
|
num_bins: int,
|
|
bin_size: Union[float, int],
|
|
sum: Optional[int] = None,
|
|
sum_squared: Optional[int] = None,
|
|
underflow: Optional[int] = None,
|
|
overflow: Optional[int] = None,
|
|
logs: Optional[float] = None,
|
|
description: Optional[str] = None,
|
|
):
|
|
super().__init__(
|
|
value=value,
|
|
type="Distribution",
|
|
description=description,
|
|
)
|
|
|
|
self.min = min
|
|
self.max = max
|
|
self.num_bins = num_bins
|
|
self.bin_size = bin_size
|
|
self.sum = sum
|
|
self.underflow = underflow
|
|
self.overflow = overflow
|
|
self.logs = logs
|
|
self.sum_squared = sum_squared
|
|
|
|
# These check some basic conditions of a distribution.
|
|
assert self.bin_size >= 0
|
|
assert self.num_bins >= 1
|
|
|
|
|
|
class SparseHist(Vector):
|
|
"""A Sparse Histogram of values. A sparse histogram simply counts the "
|
|
frequency of each value in a sample. Ergo, it is, ineffect an disctionary
|
|
of values mapped to their count"""
|
|
|
|
def __init__(
|
|
self,
|
|
value: Dict[float, Scalar],
|
|
description: Optional[str] = None,
|
|
):
|
|
super().__init__(
|
|
value=value,
|
|
type="SparseHist",
|
|
description=description,
|
|
)
|
|
|
|
def size(self) -> int:
|
|
"""The number of unique sampled values."""
|
|
return len(self.value)
|
|
|
|
def count(self) -> int:
|
|
"""
|
|
Returns the total number of samples.
|
|
"""
|
|
assert self.value != None
|
|
return sum(self.value.values())
|