stdlib: Add tests for PyStats's Vector and fix bugs

The big thing missing from the Vector stats was that each position in the vector could have it's own unique id (a str, float, or int) and each position in the vector can have its own description. Therefore, to add this the Vector is represented as a dictionary mapping the unique ID to a Pystat Scaler (whcih can have it's own unique description. Change-Id: I3a8634f43298f6491300cf5a4f9d25dee8101808
2024-03-26 01:45:00 -07:00
parent 3c86175d08
commit 252dbe9c72
10 changed files with 365 additions and 60 deletions
--- a/src/python/m5/ext/pystats/serializable_stat.py
+++ b/src/python/m5/ext/pystats/serializable_stat.py
@@ -85,6 +85,11 @@ class SerializableStat:
            return value
        elif isinstance(value, datetime):
            return value.replace(microsecond=0).isoformat()
+        elif isinstance(value, Dict):
+            d = {}
+            for k, v in value.items():
+                d[self.__process_json_value(k)] = self.__process_json_value(v)
+            return d
        elif isinstance(value, list):
            return [self.__process_json_value(v) for v in value]
        elif isinstance(value, StorageType):
--- a/src/python/m5/ext/pystats/statistic.py
+++ b/src/python/m5/ext/pystats/statistic.py
@@ -27,8 +27,8 @@
 from abc import ABC
 from typing import (
    Any,
+    Dict,
    Iterable,
-    List,
    Optional,
    Union,
 )
@@ -90,41 +90,56 @@ class Vector(Statistic):
    An Python statistics which representing a vector of Scalar values.
    """

-    value: List[Union[int, float]]
-
    def __init__(
        self,
-        value: Iterable[Union[int, float]],
+        value: Dict[Union[str, int, float], Scalar],
        type: Optional[str] = None,
        description: Optional[str] = None,
    ):
        super().__init__(
-            value=list(value),
+            value=value,
            type=type,
            description=description,
        )

+    def __getitem__(self, index: Union[int, str, float]) -> Scalar:
+        assert self.value != None
+        # In the case of string, we cast strings to integers of floats if they
+        # are numeric. This avoids users having to cast strings to integers.
+        if isinstance(index, str):
+            if index.isindex():
+                index = int(index)
+            elif index.isnumeric():
+                index = float(index)
+        return self.value[index]
+
+    def size(self) -> int:
+        """
+        Returns the size of the vector.
+
+        :returns: The size of the vector.
+        """
+        assert self.value != None
+        return len(self.value)
+
    def mean(self) -> float:
        """
        Returns the mean of the value vector.

-        :returns: The mean value across all bins.
+        :returns: The mean value across all values in the vector.
        """
        assert self.value != None
-        assert isinstance(self.value, List)

-        from statistics import mean as statistics_mean
-
-        return statistics_mean(self.value)
+        return self.count() / self.size()

    def count(self) -> float:
        """
-        Returns the count across all the bins.
+        Returns the count (sum) of all values in the vector.

-        :returns: The sum of all bin values.
+        :returns: The sum of all vector values.
        """
        assert self.value != None
-        return sum(self.value)
+        return sum(float(self.value[key]) for key in self.values)


 class Distribution(Vector):
--- a/src/python/m5/stats/gem5stats.py
+++ b/src/python/m5/stats/gem5stats.py
@@ -183,29 +183,44 @@ def __get_distribution(statistic: _m5.stats.DistInfo) -> Distribution:


 def __get_vector(statistic: _m5.stats.VectorInfo) -> Vector:
-    to_add = dict()
+    vec: Dict[Union[str, int, float], Scalar] = {}
+

    for index in range(statistic.size):
        # All the values in a Vector are Scalar values
        value = statistic.value[index]
-        unit = statistic.unit
-        description = statistic.subdescs[index]
-        # ScalarInfo uses the C++ `double`.
-        datatype = StorageType["f64"]
+        assert isinstance(value, float) or isinstance(value, int)

        # Sometimes elements within a vector are defined by their name. Other
        # times they have no name. When a name is not available, we name the
        # stat the index value.
-        if str(statistic.subnames[index]):
-            index_string = str(statistic.subnames[index])
+        if len(statistic.subnames) > index and statistic.subnames[index]:
+            index_subname = str(statistic.subnames[index])
+            if index_subname.isdigit():
+                index_subname = int(index_subname)
+            elif index_subname.isnumeric():
+                index_subname = float(index_subname)
        else:
-            index_string = str(index)
+            index_subname = index

-        to_add[index_string] = Scalar(
-            value=value, unit=unit, description=description, datatype=datatype
+        index_subdesc = None
+        if len(statistic.subdescs) > index and statistic.subdescs[index]:
+            index_subdesc = str(statistic.subdescs[index])
+        else:
+            index_subdesc = statistic.desc
+
+        vec[index_subname] = Scalar(
+            value=value,
+            unit=statistic.unit,
+            description=index_subdesc,
+            datatype=StorageType["f64"],
        )

-    return Vector(scalar_map=to_add)
+    return Vector(
+        vec,
+        type="Vector",
+        description=statistic.desc,
+    )


 def _prepare_stats(group: _m5.stats.Group):