base-stats,python: Add Python Stats

This model is used to store and represent the "new" hierarchical stats
at the Python level. Over time these classes may be extended with
functions to ease in the analysis of gem5 stats. Though, for this
commit, such functions have been kept to a minimum.

`m5/pystats/loader.py` contains functions for translating the gem5  `_m5.stats`
statistics exposed via Pybind11 to the Python Stats model. For example:

```
import m5.pystats.gem5stats as gem5stats

simstat = gem5stats.get_simstat(root)
```

All the python Stats model classes inherit from JsonSerializable meaning
they can be translated to JSON. For example:

```
import m5.pystats.gem5stats as gem5stats

simstat = gem5stats.get_simstat(root)
with open('test.json', 'w') as f:
    simstat.dump(f)
```

The stats have also been exposed via the python statistics API. Via
command line, a JSON output may be specified with the argument
`--stats-file json://<file path>`.

Change-Id: I253a869f6b6d8c0de4dbed708892ee0cc33c5665
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38615
Reviewed-by: Jason Lowe-Power <power.jg@gmail.com>
Reviewed-by: Andreas Sandberg <andreas.sandberg@arm.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Bobby R. Bruce
2021-01-17 20:52:31 -08:00
parent bd6e1fc9c5
commit f11617736e
10 changed files with 945 additions and 4 deletions

View File

@@ -0,0 +1,41 @@
# Copyright (c) 2020 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from .jsonserializable import JsonSerializable
from .group import Group
from .simstat import SimStat
from .statistic import Statistic
from .storagetype import StorageType
from .timeconversion import TimeConversion
__all__ = [
"Group",
"SimStat",
"Statistic",
"TimeConversion",
"StorageType",
"JsonSerializable",
]

View File

@@ -0,0 +1,69 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import Dict, List, Optional, Union
from .jsonserializable import JsonSerializable
from .statistic import Scalar, Statistic
from .timeconversion import TimeConversion
class Group(JsonSerializable):
"""
Used to create the heirarchical stats structure. A Group object contains a
map of labeled Groups, Statistics, Lists of Groups, or List of Statistics.
"""
type: Optional[str]
time_conversion: Optional[TimeConversion]
def __init__(self, type: Optional[str] = None,
time_conversion: Optional[TimeConversion] = None,
**kwargs: Dict[str, Union["Group",Statistic,List["Group"],
List["Statistic"]]]):
if type is None:
self.type = "Group"
else:
self.type = type
self.time_conversion = time_conversion
for key,value in kwargs.items():
setattr(self, key, value)
class Vector(Group):
"""
The Vector class is used to store vector information. However, in gem5
Vectors, in practise, hold information that is more like a dictionary of
Scalar Values. This class may change, and may be merged into Group in
accordance to decisions made in relation to
https://gem5.atlassian.net/browse/GEM5-867.
"""
def __init__(self, scalar_map: Dict[str,Scalar]):
super(Vector, self).__init__(
type="Vector",
time_conversion=None,
**scalar_map,
)

View File

@@ -0,0 +1,167 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from datetime import datetime
import json
from typing import Dict, List, Union, Any, IO
from .storagetype import StorageType
class JsonSerializable:
"""
Classes which inherit from JsonSerializable can be translated into JSON
using Python's json package.
Usage
-----
```
import m5.pystats.gem5stats as gem5stats
simstat = gem5stats.get_simstat(root)
print(simstat.dumps())
```
"""
def to_json(self) -> Dict:
"""
Translates the current object into a JSON dictionary.
Returns
-------
Dict
The JSON dictionary.
"""
model_dct = {}
for key, value in self.__dict__.items():
new_value = self.__process_json_value(value)
model_dct[key] = new_value
return model_dct
def __process_json_value(self,
value: Any) -> Union[str,int,float,Dict,List,None]:
"""
Translate values into a value which can be handled by the Python stdlib
JSON package.
Parameters
----------
value: Any
The value to be translated.
Returns
-------
Union[str,int,float,Dict,List]
A value which can be handled by the Python stdlib JSON package.
"""
if isinstance(value, JsonSerializable):
return value.to_json()
elif isinstance(value, (str, int, float)):
return value
elif isinstance(value, datetime):
return value.replace(microsecond=0).isoformat()
elif isinstance(value, list):
return [self.__process_json_value(v) for v in value]
elif isinstance(value, StorageType):
return str(value.name)
return None
def dumps(self, **kwargs) -> str:
"""
This function mirrors the Python stdlib JSON module method
`json.dumps`. It is used to obtain the gem5 statistics output to a
JSON string.
Parameters
----------
root: Root
The root of the simulation.
kwargs: Dict[str, Any]
Additional parameters to be passed to the `json.dumps` method.
Returns
-------
str
A string of the gem5 Statistics in a JSON format.
Usage Example
-------------
```
import m5.pystats.gem5stats as gem5stats
simstat = gem5stats.get_simstat(root)
print(simstat.dumps(indent=6))
```
The above will print the simulation statistic JSON string. The
indentation will be 6 (by default the indentation is 4).
"""
# Setting the default indentation to something readable.
if 'indent' not in kwargs:
kwargs['indent'] = 4
return json.dumps(obj=self.to_json(), **kwargs)
def dump(self, fp: IO[str], **kwargs) -> None:
"""
This function mirrors the Python stdlib JSON module method
`json.dump`. The root of the simulation is passed, and the JSON is
output to the specified.
Parameters
----------
fp: IO[str]
The Text IO stream to output the JSON to.
**kwargs:
Additional parameters to be passed to the ``json.dump`` method.
Usage
-----
```
import m5.pystats.gem5stats as gem5stats
simstat = gem5stats.get_simstat(root)
with open("test.json") as f:
simstat.dump(fp=f, indent=6)
```
The above will dump the json output to the 'test.json' file. The
indentation will be of 6 (by default the indentation is 4).
"""
# Setting the default indentation to something readable.
if 'indent' not in kwargs:
kwargs['indent'] = 4
json.dump(obj=self.to_json(), fp=fp, **kwargs)

View File

@@ -0,0 +1,56 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from datetime import datetime
from typing import Dict, List, Optional, Union
from .jsonserializable import JsonSerializable
from .group import Group
from .statistic import Statistic
from .timeconversion import TimeConversion
class SimStat(JsonSerializable):
"""
Contains all the statistics for a given simulation.
"""
creation_time: Optional[datetime]
time_conversion: Optional[TimeConversion]
simulated_begin_time: Optional[Union[int, float]]
simulated_end_time: Optional[Union[int, float]]
def __init__(self, creation_time: Optional[datetime],
time_conversion: Optional[TimeConversion],
simulated_begin_time: Optional[Union[int, float]],
simulated_end_time: Optional[Union[int, float]],
**kwargs: Dict[str, Union[Group,Statistic,List[Group]]]):
self.creation_time = creation_time
self.time_conversion = time_conversion
self.simulated_begin_time = simulated_begin_time
self.simulated_end_time = simulated_end_time
for key,value in kwargs.items():
setattr(self, key, value)

View File

@@ -0,0 +1,206 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from abc import ABC
from typing import Any, Optional, Union, List
from .jsonserializable import JsonSerializable
from .storagetype import StorageType
class Statistic(ABC, JsonSerializable):
"""
The abstract base class for all Python statistics.
"""
value: Any
type: Optional[str]
unit: Optional[str]
description: Optional[str]
datatype: Optional[StorageType]
def __init__(self, value: Any, type: Optional[str] = None,
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None):
self.value = value
self.type = type
self.unit = unit
self.description = description
self.datatype = datatype
class Scalar(Statistic):
"""
A scalar Python statistic type.
"""
value: Union[float, int]
def __init__(self, value: Any,
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None):
super(Scalar, self).__init__(
value=value,
type="Scalar",
unit=unit,
description=description,
datatype=datatype,
)
class BaseScalarVector(Statistic):
"""
An abstract base class for classes containing a vector of Scalar values.
"""
value: List[Union[int,float]]
def __init__(self, value: List[Union[int,float]],
type: Optional[str] = None,
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None):
super(BaseScalarVector, self).__init__(
value=value,
type=type,
unit=unit,
description=description,
datatype=datatype,
)
def mean(self) -> float:
"""
Returns the mean of the value vector.
Returns
-------
float
The mean value across all bins.
"""
assert(self.value != None)
assert(isinstance(self.value, List))
from statistics import mean as statistics_mean
return statistics_mean(self.value)
def count(self) -> int:
"""
Returns the count across all the bins.
Returns
-------
float
The sum of all bin values.
"""
assert(self.value != None)
assert(isinstance(self.value, List))
return sum(self.value)
class Distribution(BaseScalarVector):
"""
A statistic type that stores information relating to distributions. Each
distribution has a number of bins (>=1)
between this range. The values correspond to the value of each bin.
E.g., value[3]` is the value of the 4th bin.
It is assumed each bucket is of equal size.
"""
value: List[int]
min: Union[float, int]
max: Union[float, int]
num_bins: int
bin_size: Union[float, int]
sum: Optional[int]
sum_squared: Optional[int]
underflow: Optional[int]
overflow: Optional[int]
logs: Optional[float]
def __init__(self, value: List[int],
min: Union[float, int],
max: Union[float, int],
num_bins: int,
bin_size: Union[float, int],
sum: Optional[int] = None,
sum_squared: Optional[int] = None,
underflow: Optional[int] = None,
overflow: Optional[int] = None,
logs: Optional[float] = None,
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None):
super(Distribution, self).__init__(
value=value,
type="Distribution",
unit=unit,
description=description,
datatype=datatype,
)
self.min = min
self.max = max
self.num_bins = num_bins
self.bin_size = bin_size
self.sum = sum
self.underflow = underflow
self.overflow = overflow
self.logs = logs
self.sum_squared = sum_squared
# These check some basic conditions of a distribution.
assert(self.bin_size >= 0)
assert(self.num_bins >= 1)
class Accumulator(BaseScalarVector):
"""
A statistical type representing an accumulator.
"""
count: int
min: Union[int, float]
max: Union[int, float]
sum_squared: Optional[int]
def __init__(self, value: List[Union[int,float]],
count: int,
min: Union[int, float],
max: Union[int, float],
sum_squared: Optional[int] = None,
unit: Optional[str] = None,
description: Optional[str] = None,
datatype: Optional[StorageType] = None):
super(Accumulator, self).__init__(
value=value,
type="Accumulator",
unit=unit,
description=description,
datatype=datatype,
)
self.count = count
self.min = min
self.max = max
self.sum_squared = sum_squared

View File

@@ -0,0 +1,42 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from enum import Enum
from typing import Dict
class StorageType(Enum):
"""
An enum used to declare what C++ data type was used to store a value.
32 or 64 bits; signed integer (s), unsigned integer (u), or float (f).
E.g. 's64' indicates a 64 bit signed integer
"""
u32: str = "u32"
u64: str = "u64"
s32: str = "s32"
s64: str = "s64"
f32: str = "f32"
f64: str = "f64"

View File

@@ -0,0 +1,39 @@
# Copyright (c) 2021 The Regents of The University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from typing import Optional
class TimeConversion:
"""
A class for specifying a scale factor necessary to translate a simulation
time measurement (e.g. ticks) into seconds.
"""
scale_factor: float
description: Optional[str]
def __init__(self, scale_factor: float, description: Optional[str] = None):
self.scale_factor = scale_factor
self.description = description