From f11617736e886857aa01fffce0e7a1f869f2db5d Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sun, 17 Jan 2021 20:52:31 -0800 Subject: [PATCH] base-stats,python: Add Python Stats This model is used to store and represent the "new" hierarchical stats at the Python level. Over time these classes may be extended with functions to ease in the analysis of gem5 stats. Though, for this commit, such functions have been kept to a minimum. `m5/pystats/loader.py` contains functions for translating the gem5 `_m5.stats` statistics exposed via Pybind11 to the Python Stats model. For example: ``` import m5.pystats.gem5stats as gem5stats simstat = gem5stats.get_simstat(root) ``` All the python Stats model classes inherit from JsonSerializable meaning they can be translated to JSON. For example: ``` import m5.pystats.gem5stats as gem5stats simstat = gem5stats.get_simstat(root) with open('test.json', 'w') as f: simstat.dump(f) ``` The stats have also been exposed via the python statistics API. Via command line, a JSON output may be specified with the argument `--stats-file json://`. Change-Id: I253a869f6b6d8c0de4dbed708892ee0cc33c5665 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38615 Reviewed-by: Jason Lowe-Power Reviewed-by: Andreas Sandberg Maintainer: Jason Lowe-Power Tested-by: kokoro --- src/python/SConscript | 9 + src/python/m5/ext/pystats/__init__.py | 41 +++ src/python/m5/ext/pystats/group.py | 69 ++++ src/python/m5/ext/pystats/jsonserializable.py | 167 ++++++++++ src/python/m5/ext/pystats/simstat.py | 56 ++++ src/python/m5/ext/pystats/statistic.py | 206 ++++++++++++ src/python/m5/ext/pystats/storagetype.py | 42 +++ src/python/m5/ext/pystats/timeconversion.py | 39 +++ src/python/m5/stats/__init__.py | 26 +- src/python/m5/stats/gem5stats.py | 294 ++++++++++++++++++ 10 files changed, 945 insertions(+), 4 deletions(-) create mode 100644 src/python/m5/ext/pystats/__init__.py create mode 100644 src/python/m5/ext/pystats/group.py create mode 100644 src/python/m5/ext/pystats/jsonserializable.py create mode 100644 src/python/m5/ext/pystats/simstat.py create mode 100644 src/python/m5/ext/pystats/statistic.py create mode 100644 src/python/m5/ext/pystats/storagetype.py create mode 100644 src/python/m5/ext/pystats/timeconversion.py create mode 100644 src/python/m5/stats/gem5stats.py diff --git a/src/python/SConscript b/src/python/SConscript index 19f260a256..57d5578e7f 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -64,6 +64,15 @@ PySource('m5.ext', 'm5/ext/__init__.py') PySource('m5.ext.pyfdt', 'm5/ext/pyfdt/pyfdt.py') PySource('m5.ext.pyfdt', 'm5/ext/pyfdt/__init__.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/__init__.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/jsonserializable.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/group.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/simstat.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/statistic.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/storagetype.py') +PySource('m5.ext.pystats', 'm5/ext/pystats/timeconversion.py') +PySource('m5.stats', 'm5/stats/gem5stats.py') + Source('pybind11/core.cc', add_tags='python') Source('pybind11/debug.cc', add_tags='python') Source('pybind11/event.cc', add_tags='python') diff --git a/src/python/m5/ext/pystats/__init__.py b/src/python/m5/ext/pystats/__init__.py new file mode 100644 index 0000000000..4ffac9a8c1 --- /dev/null +++ b/src/python/m5/ext/pystats/__init__.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from .jsonserializable import JsonSerializable +from .group import Group +from .simstat import SimStat +from .statistic import Statistic +from .storagetype import StorageType +from .timeconversion import TimeConversion + +__all__ = [ + "Group", + "SimStat", + "Statistic", + "TimeConversion", + "StorageType", + "JsonSerializable", + ] \ No newline at end of file diff --git a/src/python/m5/ext/pystats/group.py b/src/python/m5/ext/pystats/group.py new file mode 100644 index 0000000000..41a5633496 --- /dev/null +++ b/src/python/m5/ext/pystats/group.py @@ -0,0 +1,69 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Dict, List, Optional, Union + +from .jsonserializable import JsonSerializable +from .statistic import Scalar, Statistic +from .timeconversion import TimeConversion + +class Group(JsonSerializable): + """ + Used to create the heirarchical stats structure. A Group object contains a + map of labeled Groups, Statistics, Lists of Groups, or List of Statistics. + """ + + type: Optional[str] + time_conversion: Optional[TimeConversion] + + def __init__(self, type: Optional[str] = None, + time_conversion: Optional[TimeConversion] = None, + **kwargs: Dict[str, Union["Group",Statistic,List["Group"], + List["Statistic"]]]): + if type is None: + self.type = "Group" + else: + self.type = type + + self.time_conversion = time_conversion + + for key,value in kwargs.items(): + setattr(self, key, value) + +class Vector(Group): + """ + The Vector class is used to store vector information. However, in gem5 + Vectors, in practise, hold information that is more like a dictionary of + Scalar Values. This class may change, and may be merged into Group in + accordance to decisions made in relation to + https://gem5.atlassian.net/browse/GEM5-867. + """ + def __init__(self, scalar_map: Dict[str,Scalar]): + super(Vector, self).__init__( + type="Vector", + time_conversion=None, + **scalar_map, + ) \ No newline at end of file diff --git a/src/python/m5/ext/pystats/jsonserializable.py b/src/python/m5/ext/pystats/jsonserializable.py new file mode 100644 index 0000000000..69b15f08fa --- /dev/null +++ b/src/python/m5/ext/pystats/jsonserializable.py @@ -0,0 +1,167 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from datetime import datetime +import json +from typing import Dict, List, Union, Any, IO + +from .storagetype import StorageType + +class JsonSerializable: + """ + Classes which inherit from JsonSerializable can be translated into JSON + using Python's json package. + + Usage + ----- + ``` + import m5.pystats.gem5stats as gem5stats + + simstat = gem5stats.get_simstat(root) + print(simstat.dumps()) + ``` + """ + + def to_json(self) -> Dict: + """ + Translates the current object into a JSON dictionary. + + Returns + ------- + Dict + The JSON dictionary. + """ + + model_dct = {} + for key, value in self.__dict__.items(): + new_value = self.__process_json_value(value) + model_dct[key] = new_value + return model_dct + + def __process_json_value(self, + value: Any) -> Union[str,int,float,Dict,List,None]: + """ + Translate values into a value which can be handled by the Python stdlib + JSON package. + + Parameters + ---------- + value: Any + The value to be translated. + + Returns + ------- + Union[str,int,float,Dict,List] + A value which can be handled by the Python stdlib JSON package. + """ + + if isinstance(value, JsonSerializable): + return value.to_json() + elif isinstance(value, (str, int, float)): + return value + elif isinstance(value, datetime): + return value.replace(microsecond=0).isoformat() + elif isinstance(value, list): + return [self.__process_json_value(v) for v in value] + elif isinstance(value, StorageType): + return str(value.name) + + return None + + + def dumps(self, **kwargs) -> str: + """ + This function mirrors the Python stdlib JSON module method + `json.dumps`. It is used to obtain the gem5 statistics output to a + JSON string. + + Parameters + ---------- + root: Root + The root of the simulation. + + kwargs: Dict[str, Any] + Additional parameters to be passed to the `json.dumps` method. + + Returns + ------- + str + A string of the gem5 Statistics in a JSON format. + + + Usage Example + ------------- + ``` + import m5.pystats.gem5stats as gem5stats + + simstat = gem5stats.get_simstat(root) + print(simstat.dumps(indent=6)) + ``` + + The above will print the simulation statistic JSON string. The + indentation will be 6 (by default the indentation is 4). + """ + + # Setting the default indentation to something readable. + if 'indent' not in kwargs: + kwargs['indent'] = 4 + + return json.dumps(obj=self.to_json(), **kwargs) + + def dump(self, fp: IO[str], **kwargs) -> None: + """ + This function mirrors the Python stdlib JSON module method + `json.dump`. The root of the simulation is passed, and the JSON is + output to the specified. + + + Parameters + ---------- + fp: IO[str] + The Text IO stream to output the JSON to. + + **kwargs: + Additional parameters to be passed to the ``json.dump`` method. + + Usage + ----- + ``` + import m5.pystats.gem5stats as gem5stats + + simstat = gem5stats.get_simstat(root) + with open("test.json") as f: + simstat.dump(fp=f, indent=6) + ``` + + The above will dump the json output to the 'test.json' file. The + indentation will be of 6 (by default the indentation is 4). + """ + + # Setting the default indentation to something readable. + if 'indent' not in kwargs: + kwargs['indent'] = 4 + + json.dump(obj=self.to_json(), fp=fp, **kwargs) \ No newline at end of file diff --git a/src/python/m5/ext/pystats/simstat.py b/src/python/m5/ext/pystats/simstat.py new file mode 100644 index 0000000000..b65fc930d1 --- /dev/null +++ b/src/python/m5/ext/pystats/simstat.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from datetime import datetime +from typing import Dict, List, Optional, Union + +from .jsonserializable import JsonSerializable +from .group import Group +from .statistic import Statistic +from .timeconversion import TimeConversion + +class SimStat(JsonSerializable): + """ + Contains all the statistics for a given simulation. + """ + + creation_time: Optional[datetime] + time_conversion: Optional[TimeConversion] + simulated_begin_time: Optional[Union[int, float]] + simulated_end_time: Optional[Union[int, float]] + + def __init__(self, creation_time: Optional[datetime], + time_conversion: Optional[TimeConversion], + simulated_begin_time: Optional[Union[int, float]], + simulated_end_time: Optional[Union[int, float]], + **kwargs: Dict[str, Union[Group,Statistic,List[Group]]]): + self.creation_time = creation_time + self.time_conversion = time_conversion + self.simulated_begin_time = simulated_begin_time + self.simulated_end_time = simulated_end_time + + for key,value in kwargs.items(): + setattr(self, key, value) \ No newline at end of file diff --git a/src/python/m5/ext/pystats/statistic.py b/src/python/m5/ext/pystats/statistic.py new file mode 100644 index 0000000000..00d479da9a --- /dev/null +++ b/src/python/m5/ext/pystats/statistic.py @@ -0,0 +1,206 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from abc import ABC +from typing import Any, Optional, Union, List + +from .jsonserializable import JsonSerializable +from .storagetype import StorageType + +class Statistic(ABC, JsonSerializable): + """ + The abstract base class for all Python statistics. + """ + + value: Any + type: Optional[str] + unit: Optional[str] + description: Optional[str] + datatype: Optional[StorageType] + + def __init__(self, value: Any, type: Optional[str] = None, + unit: Optional[str] = None, + description: Optional[str] = None, + datatype: Optional[StorageType] = None): + self.value = value + self.type = type + self.unit = unit + self.description = description + self.datatype = datatype + +class Scalar(Statistic): + """ + A scalar Python statistic type. + """ + + value: Union[float, int] + + def __init__(self, value: Any, + unit: Optional[str] = None, + description: Optional[str] = None, + datatype: Optional[StorageType] = None): + super(Scalar, self).__init__( + value=value, + type="Scalar", + unit=unit, + description=description, + datatype=datatype, + ) + +class BaseScalarVector(Statistic): + """ + An abstract base class for classes containing a vector of Scalar values. + """ + value: List[Union[int,float]] + + def __init__(self, value: List[Union[int,float]], + type: Optional[str] = None, + unit: Optional[str] = None, + description: Optional[str] = None, + datatype: Optional[StorageType] = None): + super(BaseScalarVector, self).__init__( + value=value, + type=type, + unit=unit, + description=description, + datatype=datatype, + ) + + def mean(self) -> float: + """ + Returns the mean of the value vector. + + Returns + ------- + float + The mean value across all bins. + """ + assert(self.value != None) + assert(isinstance(self.value, List)) + + from statistics import mean as statistics_mean + return statistics_mean(self.value) + + def count(self) -> int: + """ + Returns the count across all the bins. + + Returns + ------- + float + The sum of all bin values. + """ + assert(self.value != None) + assert(isinstance(self.value, List)) + return sum(self.value) + + +class Distribution(BaseScalarVector): + """ + A statistic type that stores information relating to distributions. Each + distribution has a number of bins (>=1) + between this range. The values correspond to the value of each bin. + E.g., value[3]` is the value of the 4th bin. + + It is assumed each bucket is of equal size. + """ + + value: List[int] + min: Union[float, int] + max: Union[float, int] + num_bins: int + bin_size: Union[float, int] + sum: Optional[int] + sum_squared: Optional[int] + underflow: Optional[int] + overflow: Optional[int] + logs: Optional[float] + + def __init__(self, value: List[int], + min: Union[float, int], + max: Union[float, int], + num_bins: int, + bin_size: Union[float, int], + sum: Optional[int] = None, + sum_squared: Optional[int] = None, + underflow: Optional[int] = None, + overflow: Optional[int] = None, + logs: Optional[float] = None, + unit: Optional[str] = None, + description: Optional[str] = None, + datatype: Optional[StorageType] = None): + super(Distribution, self).__init__( + value=value, + type="Distribution", + unit=unit, + description=description, + datatype=datatype, + ) + + self.min = min + self.max = max + self.num_bins = num_bins + self.bin_size = bin_size + self.sum = sum + self.underflow = underflow + self.overflow = overflow + self.logs = logs + self.sum_squared = sum_squared + + # These check some basic conditions of a distribution. + assert(self.bin_size >= 0) + assert(self.num_bins >= 1) + +class Accumulator(BaseScalarVector): + """ + A statistical type representing an accumulator. + """ + + count: int + min: Union[int, float] + max: Union[int, float] + sum_squared: Optional[int] + + def __init__(self, value: List[Union[int,float]], + count: int, + min: Union[int, float], + max: Union[int, float], + sum_squared: Optional[int] = None, + unit: Optional[str] = None, + description: Optional[str] = None, + datatype: Optional[StorageType] = None): + super(Accumulator, self).__init__( + value=value, + type="Accumulator", + unit=unit, + description=description, + datatype=datatype, + ) + + self.count = count + self.min = min + self.max = max + self.sum_squared = sum_squared \ No newline at end of file diff --git a/src/python/m5/ext/pystats/storagetype.py b/src/python/m5/ext/pystats/storagetype.py new file mode 100644 index 0000000000..562cc8315e --- /dev/null +++ b/src/python/m5/ext/pystats/storagetype.py @@ -0,0 +1,42 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from enum import Enum +from typing import Dict + +class StorageType(Enum): + """ + An enum used to declare what C++ data type was used to store a value. + 32 or 64 bits; signed integer (s), unsigned integer (u), or float (f). + + E.g. 's64' indicates a 64 bit signed integer + """ + u32: str = "u32" + u64: str = "u64" + s32: str = "s32" + s64: str = "s64" + f32: str = "f32" + f64: str = "f64" \ No newline at end of file diff --git a/src/python/m5/ext/pystats/timeconversion.py b/src/python/m5/ext/pystats/timeconversion.py new file mode 100644 index 0000000000..92d0749ef9 --- /dev/null +++ b/src/python/m5/ext/pystats/timeconversion.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from typing import Optional + +class TimeConversion: + """ + A class for specifying a scale factor necessary to translate a simulation + time measurement (e.g. ticks) into seconds. + """ + scale_factor: float + description: Optional[str] + + def __init__(self, scale_factor: float, description: Optional[str] = None): + self.scale_factor = scale_factor + self.description = description \ No newline at end of file diff --git a/src/python/m5/stats/__init__.py b/src/python/m5/stats/__init__.py index cba1a32dfd..7f91487e77 100644 --- a/src/python/m5/stats/__init__.py +++ b/src/python/m5/stats/__init__.py @@ -42,6 +42,7 @@ import m5 import _m5.stats from m5.objects import Root from m5.params import isNullPointer +from .gem5stats import JsonOutputVistor from m5.util import attrdict, fatal # Stat exports @@ -182,6 +183,17 @@ def _hdf5Factory(fn, chunking=10, desc=True, formulas=True): return _m5.stats.initHDF5(fn, chunking, desc, formulas) +@_url_factory(["json"]) +def _jsonFactory(fn): + """Output stats in JSON format. + + Example: + json://stats.json + + """ + + return JsonOutputVistor(fn) + def addStatVisitor(url): """Add a stat visitor specified using a URL string @@ -383,10 +395,16 @@ def dump(roots=None): prepare() for output in outputList: - if output.valid(): - output.begin() - _dump_to_visitor(output, roots=all_roots) - output.end() + if isinstance(output, JsonOutputVistor): + if not all_roots: + output.dump(Root.getInstance()) + else: + output.dump(all_roots) + else: + if output.valid(): + output.begin() + _dump_to_visitor(output, roots=all_roots) + output.end() def reset(): '''Reset all statistics to the base state''' diff --git a/src/python/m5/stats/gem5stats.py b/src/python/m5/stats/gem5stats.py new file mode 100644 index 0000000000..3afc7766c5 --- /dev/null +++ b/src/python/m5/stats/gem5stats.py @@ -0,0 +1,294 @@ +# Copyright (c) 2021 The Regents of The University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +This serves as the bridge between the gem5 statistics exposed via PyBind11 and +the Python Stats model. +""" + +from datetime import datetime +from typing import IO, List, Union + +import _m5.stats +from m5.objects import * +from m5.ext.pystats.group import * +from m5.ext.pystats.simstat import * +from m5.ext.pystats.statistic import * +from m5.ext.pystats.storagetype import * + +class JsonOutputVistor(): + """ + This is a helper vistor class used to include a JSON output via the stats + API (`src/python/m5/stats/__init__.py`). + """ + file: str + json_args: Dict + + def __init__(self, file: str, **kwargs): + """ + Parameters + ---------- + + file: str + The output file location in which the JSON will be dumped. + + kwargs: Dict[str, Any] + Additional parameters to be passed to the `json.dumps` method. + """ + + self.file = file + self.json_args = kwargs + + def dump(self, roots: Union[List[SimObject], Root]) -> None: + """ + Dumps the stats of a simulation root (or list of roots) to the output + JSON file specified in the JsonOutput constructor. + + WARNING: This dump assumes the statistics have already been prepared + for the target root. + + Parameters + ---------- + + roots: Union[List[Root], Root]] + The Root, or List of roots, whose stats are are to be dumped JSON. + """ + + with open(self.file, 'w') as fp: + simstat = get_simstat(root=roots, prepare_stats=False) + simstat.dump(fp=fp, **self.json_args) + +def get_stats_group(group: _m5.stats.Group) -> Group: + """ + Translates a gem5 Group object into a Python stats Group object. A Python + statistic Group object is a dictionary of labeled Statistic objects. Any + gem5 object passed to this will have its `getStats()` and `getStatGroups` + function called, and all the stats translated (inclusive of the stats + further down the hierarchy). + + Parameters + ---------- + group: _m5.stats.Group + The gem5 _m5.stats.Group object to be translated to be a Python stats + Group object. Typically this will be a gem5 SimObject. + + Returns + ------- + Group + The stats group object translated from the input gem5 object. + """ + + stats_dict = {} + + for stat in group.getStats(): + statistic = __get_statistic(stat) + if statistic is not None: + stats_dict[stat.name] = statistic + + for key in group.getStatGroups(): + stats_dict[key] = get_stats_group(group.getStatGroups()[key]) + + return Group(**stats_dict) + +def __get_statistic(statistic: _m5.stats.Info) -> Optional[Statistic]: + """ + Translates a _m5.stats.Info object into a Statistic object, to process + statistics at the Python level. + + Parameters + ---------- + statistic: Info + The Info object to be translated to a Statistic object. + + Returns + ------- + Optional[Statistic] + The Statistic object of the Info object. Returns None if Info object + cannot be translated. + """ + + assert(isinstance(statistic, _m5.stats.Info)) + statistic.prepare() + + if isinstance(statistic, _m5.stats.ScalarInfo): + return __get_scaler(statistic) + elif isinstance(statistic, _m5.stats.DistInfo): + return __get_distribution(statistic) + elif isinstance(statistic, _m5.stats.FormulaInfo): + # We don't do anything with Formula's right now. + # We may never do so, see https://gem5.atlassian.net/browse/GEM5-868. + pass + elif isinstance(statistic, _m5.stats.VectorInfo): + return __get_vector(statistic) + + return None + +def __get_scaler(statistic: _m5.stats.ScalarInfo) -> Scalar: + value = statistic.value + unit = None # TODO https://gem5.atlassian.net/browse/GEM5-850. + description = statistic.desc + # ScalarInfo uses the C++ `double`. + datatype = StorageType["f64"] + + return Scalar( + value=value, + unit=unit, + description=description, + datatype=datatype, + ) + +def __get_distribution(statistic: _m5.stats.DistInfo) -> Distribution: + unit = None # TODO https://gem5.atlassian.net/browse/GEM5-850. + description = statistic.desc + value = statistic.values + bin_size = statistic.bucket_size + min = statistic.min_val + max = statistic.max_val + num_bins = len(value) + sum_val = statistic.sum + sum_squared = statistic.squares + underflow = statistic.underflow + overflow = statistic.overflow + logs = statistic.logs + # DistInfo uses the C++ `double`. + datatype = StorageType["f64"] + + return Distribution( + value=value, + min=min, + max=max, + num_bins=num_bins, + bin_size=bin_size, + sum = sum_val, + sum_squared = sum_squared, + underflow = underflow, + overflow = overflow, + logs = logs, + unit=unit, + description=description, + datatype=datatype, + ) + +def __get_vector(statistic: _m5.stats.VectorInfo) -> Vector: + to_add = dict() + + for index in range(statistic.size): + # All the values in a Vector are Scalar values + value = statistic.value[index] + unit = None # TODO https://gem5.atlassian.net/browse/GEM5-850. + description = statistic.subdescs[index] + # ScalarInfo uses the C++ `double`. + datatype = StorageType["f64"] + + # Sometimes elements within a vector are defined by their name. Other + # times they have no name. When a name is not available, we name the + # stat the index value. + if str(statistic.subnames[index]): + index_string = str(statistic.subnames[index]) + else: + index_string = str(index) + + to_add[index_string] = Scalar( + value=value, + unit=unit, + description=description, + datatype=datatype, + ) + + return Vector(scalar_map=to_add) + +def _prepare_stats(group: _m5.stats.Group): + """ + Prepares the statistics for dumping. + """ + + group.preDumpStats() + + for stat in group.getStats(): + stat.prepare() + + for child in getStatGroups().values(): + _prepare_stats(child) + + +def get_simstat(root: Union[Root, List[SimObject]], + prepare_stats: bool = True) -> SimStat: + """ + This function will return the SimStat object for a simulation. From the + SimStat object all stats within the current gem5 simulation are present. + + Parameters + ---------- + root: Union[Root, List[Root]] + The root, or a list of Simobjects, of the simulation for translation to + a SimStat object. + + prepare_stats: bool + Dictates whether the stats are to be prepared prior to creating the + SimStat object. By default this is 'True'. + + Returns + ------- + SimStat + The SimStat Object of the current simulation. + + """ + stats_map = {} + creation_time = datetime.now() + time_converstion = None # TODO https://gem5.atlassian.net/browse/GEM5-846 + final_tick = Root.getInstance().resolveStat("finalTick").value + sim_ticks = Root.getInstance().resolveStat("simTicks").value + simulated_begin_time = int(final_tick - sim_ticks) + simulated_end_time = int(final_tick) + + if prepare_stats: + _m5.stats.processDumpQueue() + + for r in root: + if isinstance(r, Root): + if prepare_stats: + _prepare_stats(r) + for key in r.getStatGroups(): + stats_map[key] = get_stats_group(r.getStatGroups()[key]) + elif isinstance(r, SimObject): + if prepare_stats: + _prepare_stats(r) + stats_map[r.name] = get_stats_group(r) + else: + raise TypeError("Object (" + str(r) + ") passed is neither Root " + "nor SimObject. " + __name__ + " only processes " + "Roots, SimObjects, or a list of Roots and/or " + "SimObjects.") + + + + return SimStat( + creation_time=creation_time, + time_conversion=time_converstion, + simulated_begin_time=simulated_begin_time, + simulated_end_time=simulated_end_time, + **stats_map, + )