stdlib: Add tests for PyStats's Vector and fix bugs

The big thing missing from the Vector stats was that each position in
the vector could have it's own unique id (a str, float, or int) and each
position in the vector can have its own description. Therefore, to add
this the Vector is represented as a dictionary mapping the unique ID to
a Pystat Scaler (whcih can have it's own unique description.

Change-Id: I3a8634f43298f6491300cf5a4f9d25dee8101808
This commit is contained in:
Bobby R. Bruce
2024-03-26 01:45:00 -07:00
parent 3c86175d08
commit 252dbe9c72
10 changed files with 365 additions and 60 deletions

View File

@@ -85,6 +85,11 @@ class SerializableStat:
return value
elif isinstance(value, datetime):
return value.replace(microsecond=0).isoformat()
elif isinstance(value, Dict):
d = {}
for k, v in value.items():
d[self.__process_json_value(k)] = self.__process_json_value(v)
return d
elif isinstance(value, list):
return [self.__process_json_value(v) for v in value]
elif isinstance(value, StorageType):

View File

@@ -27,8 +27,8 @@
from abc import ABC
from typing import (
Any,
Dict,
Iterable,
List,
Optional,
Union,
)
@@ -90,41 +90,56 @@ class Vector(Statistic):
An Python statistics which representing a vector of Scalar values.
"""
value: List[Union[int, float]]
def __init__(
self,
value: Iterable[Union[int, float]],
value: Dict[Union[str, int, float], Scalar],
type: Optional[str] = None,
description: Optional[str] = None,
):
super().__init__(
value=list(value),
value=value,
type=type,
description=description,
)
def __getitem__(self, index: Union[int, str, float]) -> Scalar:
assert self.value != None
# In the case of string, we cast strings to integers of floats if they
# are numeric. This avoids users having to cast strings to integers.
if isinstance(index, str):
if index.isindex():
index = int(index)
elif index.isnumeric():
index = float(index)
return self.value[index]
def size(self) -> int:
"""
Returns the size of the vector.
:returns: The size of the vector.
"""
assert self.value != None
return len(self.value)
def mean(self) -> float:
"""
Returns the mean of the value vector.
:returns: The mean value across all bins.
:returns: The mean value across all values in the vector.
"""
assert self.value != None
assert isinstance(self.value, List)
from statistics import mean as statistics_mean
return statistics_mean(self.value)
return self.count() / self.size()
def count(self) -> float:
"""
Returns the count across all the bins.
Returns the count (sum) of all values in the vector.
:returns: The sum of all bin values.
:returns: The sum of all vector values.
"""
assert self.value != None
return sum(self.value)
return sum(float(self.value[key]) for key in self.values)
class Distribution(Vector):

View File

@@ -183,29 +183,44 @@ def __get_distribution(statistic: _m5.stats.DistInfo) -> Distribution:
def __get_vector(statistic: _m5.stats.VectorInfo) -> Vector:
to_add = dict()
vec: Dict[Union[str, int, float], Scalar] = {}
for index in range(statistic.size):
# All the values in a Vector are Scalar values
value = statistic.value[index]
unit = statistic.unit
description = statistic.subdescs[index]
# ScalarInfo uses the C++ `double`.
datatype = StorageType["f64"]
assert isinstance(value, float) or isinstance(value, int)
# Sometimes elements within a vector are defined by their name. Other
# times they have no name. When a name is not available, we name the
# stat the index value.
if str(statistic.subnames[index]):
index_string = str(statistic.subnames[index])
if len(statistic.subnames) > index and statistic.subnames[index]:
index_subname = str(statistic.subnames[index])
if index_subname.isdigit():
index_subname = int(index_subname)
elif index_subname.isnumeric():
index_subname = float(index_subname)
else:
index_string = str(index)
index_subname = index
to_add[index_string] = Scalar(
value=value, unit=unit, description=description, datatype=datatype
index_subdesc = None
if len(statistic.subdescs) > index and statistic.subdescs[index]:
index_subdesc = str(statistic.subdescs[index])
else:
index_subdesc = statistic.desc
vec[index_subname] = Scalar(
value=value,
unit=statistic.unit,
description=index_subdesc,
datatype=StorageType["f64"],
)
return Vector(scalar_map=to_add)
return Vector(
vec,
type="Vector",
description=statistic.desc,
)
def _prepare_stats(group: _m5.stats.Group):