stdlib: Refactor Looppoint

This change refactors the Looppoint files. While functionally
equivalent, this classes have been moved and altered to be easier to
handle going forward. The following changes have been made:

- New classes have been added to represent the data structure of the
  Looppoint JSON. This simplifies the parsing of JSON files and makes it
  handle Looppoint data structures. Ultimately this is hidden from the
  user via the new 'gem5.resources.Looppoint' class which will be the
  front-facing class for Looppoint interactions.
- The `LooppointCheckpoint` class has been replaced with
  `LooppointCsvLoader`. This new class takes in a CSV pintpoints file
  to load necessary looppoint data.
- The `LoopPointRestore` class has been replaced by
  `LooppointJsonLoader`.
- All Looppoint classes have been moved to `gem5.resources`. This will
  make it easier when we add Looppoints as specific gem5 resources.

Change-Id: I11dd1fe8f76658db220320584270d57cb37a3c62
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67611
Tested-by: kokoro <noreply+kokoro@google.com>
Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu>
Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
This commit is contained in:
Bobby R. Bruce
2023-02-05 20:20:30 +00:00
committed by Bobby Bruce
parent f59d860e51
commit aae3430281
7 changed files with 564 additions and 406 deletions

View File

@@ -61,7 +61,7 @@ from pathlib import Path
from gem5.simulate.exit_event_generators import (
looppoint_save_checkpoint_generator,
)
from gem5.utils.looppoint import LoopPointCheckpoint
from gem5.resources.looppoint import LooppointCsvLoader
import argparse
requires(isa_required=ISA.X86)
@@ -103,16 +103,13 @@ processor = SimpleProcessor(
num_cores=9,
)
looppoint = LoopPointCheckpoint(
looppoint = LooppointCsvLoader(
# Pass in the LoopPoint data file
looppoint_file=Path(
obtain_resource(
"x86-matrix-multiply-omp-100-8-global-pinpoints"
).get_local_path()
),
# True if the LoopPoint data file is a csv generated by Pin.
# False if it is a JSON file generated by the gem5 simulator.
if_csv=True,
)
)
board = SimpleBoard(

View File

@@ -55,7 +55,7 @@ from gem5.components.processors.cpu_types import CPUTypes
from gem5.isas import ISA
from gem5.resources.resource import obtain_resource
from pathlib import Path
from gem5.utils.looppoint import LoopPointRestore
from gem5.utils.resource import LooppointJsonLoader
from m5.stats import reset, dump
requires(isa_required=ISA.X86)
@@ -113,7 +113,7 @@ board = SimpleBoard(
cache_hierarchy=cache_hierarchy,
)
looppoint = LoopPointRestore(
looppoint = LooppointJsonLoader(
looppoint_file=Path(
obtain_resource(
"x86-matrix-multiply-omp-100-8-looppoint"

View File

@@ -240,7 +240,6 @@ PySource('gem5.components.processors',
PySource('gem5.components.processors',
'gem5/components/processors/switchable_processor.py')
PySource('gem5.utils', 'gem5/utils/simpoint.py')
PySource('gem5.utils', 'gem5/utils/looppoint.py')
PySource('gem5.components.processors',
'gem5/components/processors/traffic_generator_core.py')
PySource('gem5.components.processors',
@@ -263,6 +262,7 @@ PySource('gem5.resources', 'gem5/resources/downloader.py')
PySource('gem5.resources', 'gem5/resources/md5_utils.py')
PySource('gem5.resources', 'gem5/resources/resource.py')
PySource('gem5.resources', 'gem5/resources/workload.py')
PySource('gem5.resources', 'gem5/resources/looppoint.py')
PySource('gem5.utils', 'gem5/utils/__init__.py')
PySource('gem5.utils', 'gem5/utils/filelock.py')
PySource('gem5.utils', 'gem5/utils/override.py')

View File

@@ -35,7 +35,7 @@ from ...resources.resource import (
SimpointDirectoryResource,
)
from gem5.utils.looppoint import LoopPoint
from gem5.resources.looppoint import Looppoint
from m5.objects import SEWorkload, Process
@@ -177,9 +177,10 @@ class SEBinaryWorkload:
def set_se_looppoint_workload(
self,
binary: AbstractResource,
looppoint: Looppoint,
arguments: List[str] = [],
looppoint: Optional[Union[AbstractResource, LoopPoint]] = None,
checkpoint: Optional[Union[Path, AbstractResource]] = None,
region_id: Optional[Union[int, str]] = None,
) -> None:
"""Set up the system to run a LoopPoint workload.
@@ -188,18 +189,18 @@ class SEBinaryWorkload:
ISA and the simulated ISA are the same.
:param binary: The resource encapsulating the binary to be run.
:param arguments: The input arguments for the binary
:param looppoint: The LoopPoint object that contain all the information
gather from the LoopPoint files and a LoopPointManager that will raise
exit events for LoopPoints
:param arguments: The input arguments for the binary
:param region_id: If set, will only load the Looppoint region
corresponding to that ID.
"""
if isinstance(looppoint, AbstractResource):
self._looppoint_object = LoopPoint(looppoint)
else:
assert isinstance(looppoint, LoopPoint)
self._looppoint_object = looppoint
assert isinstance(looppoint, Looppoint)
self._looppoint_object = looppoint
if region_id:
self._looppoint_object.set_target_region_id(region_id=region_id)
self._looppoint_object.setup_processor(self.get_processor())
# Call set_se_binary_workload after LoopPoint setup is complete
@@ -209,7 +210,7 @@ class SEBinaryWorkload:
checkpoint=checkpoint,
)
def get_looppoint(self) -> LoopPoint:
def get_looppoint(self) -> Looppoint:
"""
Returns the LoopPoint object set. If no LoopPoint object has been set
an exception is thrown.

View File

@@ -0,0 +1,544 @@
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.params import PcCountPair
from m5.objects import PcCountTrackerManager
import m5
import os
import csv
import json
from pathlib import Path
from typing import List, Optional, Dict, Union
class LooppointRegionPC:
"""A data structure for storing the Looppoint region's PC information.
**Note**: This is not intended to be a user-facing class. The classes
`LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
and restore Simpoint data.
"""
def __init__(self, pc: int, globl: int, relative: Optional[int] = None):
"""
:param pc: The Program Counter value of this region.
:param globl: The global value of this region.
:param relative: The relative program counter value. Optional.
"""
self._pc = pc
self._global = globl
self._relative = relative
def get_pc(self) -> int:
"""Returns the Program counter value."""
return self._pc
def get_global(self) -> int:
"""Returns the global value."""
return self._global
def get_relative(self) -> Optional[int]:
"""If specified, returns the relative Program counter value, otherwise
returns None."""
return self._relative
def get_pc_count_pair(self) -> PcCountPair:
"""Returns the PcCountPair for this Region PC value."""
return PcCountPair(self.get_pc(), self.get_global())
def update_relative_count(self, manager: PcCountTrackerManager) -> None:
"""Updates the relative count."""
self._relative = int(
self.get_global() - manager.getPcCount(self.get_pc())
)
def to_json(self) -> Dict[str, int]:
"""Returns this class in a JSON structure which can then be serialized
and later be restored from."""
to_return = {
"pc": self.get_pc(),
"global": self.get_global(),
}
if self._relative:
to_return["relative"] = self.get_relative()
return to_return
class LooppointRegionWarmup:
"""A data structure for storing a Looppoint region's warmup data.
**Note**: This is not intended to be a user-facing class. The classes
`LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
and restore Simpoint data.
"""
def __init__(self, start: PcCountPair, end: PcCountPair):
"""
:param start: The starting PcCountPair.
:param end: The ending PcCountPair.
"""
self._start = start
self._end = end
def get_start(self) -> PcCountPair:
"""Returns the PcCountPair for the start of the region warmup."""
return self._start
def get_end(self) -> PcCountPair:
"""Returns the PcCountPair for the end of the region warmup."""
return self._end
def get_pc_count_pairs(self) -> List[PcCountPair]:
"""Returns the start and end PC count pairs."""
return [self.get_start(), self.get_end()]
def to_json(self) -> Dict[str, Dict[str, int]]:
"""Returns this class in a JSON structure which can then be
serialized."""
return {
"start": {
"pc": self.get_start().pc,
"count": self.get_start().count,
},
"end": {
"pc": self.get_end().pc,
"count": self.get_end().count,
},
}
class LooppointSimulation:
"""A data structure to store the simulation region start and end region.
**Note**: This is not intended to be a user-facing class. The classes
`LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
and restore Simpoint data.
"""
def __init__(self, start: LooppointRegionPC, end: LooppointRegionPC):
"""
:param start: The starting LooppointRegionPC.
:param end: The ending LoopppointRegionPC.
"""
self._start = start
self._end = end
def get_start(self) -> LooppointRegionPC:
"""Returns the starting LooppointRegionPC data structure."""
return self._start
def get_end(self) -> LooppointRegionPC:
"""Returns the ending LooppointRegionPC data structure."""
return self._end
def get_pc_count_pairs(self) -> List[PcCountPair]:
"""Returns the PC count pairs for the start and end
LoopointRegionPCs."""
return [
self.get_start().get_pc_count_pair(),
self.get_end().get_pc_count_pair(),
]
def update_relatives_counts(
self, manager: PcCountTrackerManager, include_start: bool = False
) -> None:
"""Updates the relative counts for this simulation region."""
if include_start:
# if this region has a warmup interval,
# then update the relative count for the
# start of the simulation region
self.get_start().update_relative_count(manager=manager)
self.get_end().update_relative_count(manager=manager)
def to_json(self) -> Dict:
"""Returns this class in a JSON structure which can then be serialized
and later be restored from."""
return {
"start": self.get_start().to_json(),
"end": self.get_end().to_json(),
}
class LooppointRegion:
"""A data structure to store Looppoint region information.
**Note**: This is not intended to be a user-facing class. The classes
`LooppointJsonLoader` and `LooppointCSVLoader` can be used to load
and restore Simpoint data.
"""
def __init__(
self,
simulation: LooppointSimulation,
multiplier: float,
warmup: Optional[LooppointRegionWarmup] = None,
):
"""
:param simulation: The simulation information for this Looppoint
region.
:param multiplier: The multiplier for this Looppoint region.
:param warmup: The warmup information for this Looppoint region.
Optional.
"""
self._simulation = simulation
self._multiplier = multiplier
self._warmup = warmup
def get_simulation(self) -> LooppointSimulation:
"""Returns the simulation region information."""
return self._simulation
def get_multiplier(self) -> float:
"""Returns the multiplier."""
return self._multiplier
def get_warmup(self) -> Optional[LooppointRegionWarmup]:
"""If set, returns the warmup region information. Otherwise None."""
return self._warmup
def get_pc_count_pairs(self) -> List[PcCountPair]:
"""Returns the PC count pairs for this Looppoint region."""
pc_count_pairs = self.get_simulation().get_pc_count_pairs()
if self.get_warmup():
pc_count_pairs.extend(self.get_warmup().get_pc_count_pairs())
return pc_count_pairs
def update_relatives_counts(self, manager: PcCountTrackerManager) -> None:
"""Updates the relative counds of this Looppoint region."""
self.get_simulation().update_relatives_counts(
manager=manager, include_start=bool(self.get_warmup())
)
def get_start(self) -> PcCountPair:
"""Returns the correct starting PcCountPair for this Looppoint
region."""
if self.get_warmup():
return self.get_warmup().get_start()
return self.get_simulation().get_start().get_pc_count_pair()
def to_json(self) -> Dict:
"""Returns this class in a JSON structure which can then be serialized
and later be restored from."""
to_return = {
"simulation": self.get_simulation().to_json(),
"multiplier": self.get_multiplier(),
}
if self.get_warmup():
to_return["warmup"] = self.get_warmup().to_json()
return to_return
class Looppoint:
"""Stores all the Looppoint information for a gem5 workload."""
def __init__(self, regions: Dict[Union[str, int], LooppointRegion]):
"""
:param regions: A dictionary mapping the region_ids with the
LooppointRegions.
"""
self._regions = regions
self._manager = PcCountTrackerManager()
self._manager.targets = self.get_targets()
def set_target_region_id(self, region_id: Union[str, int]) -> None:
"""There are use-cases where we want to obtain a looppoint data
structure containing a single target region via its ID. This function
will remove all irrelevant regions."""
if region_id not in self._regions:
raise Exception(f"Region ID '{region_id}' cannot be found.")
to_remove = [rid for rid in self._regions if rid is not region_id]
for rid in to_remove:
del self._regions[rid]
self._manager.targets = self.get_targets()
def get_manager(self) -> PcCountTrackerManager:
"""Returns the PcCountTrackerManager for this Looppoint data
structure."""
return self._manager
def get_regions(self) -> Dict[Union[int, str], LooppointRegion]:
"""Returns the regions for this Looppoint data structure."""
return self._regions
def setup_processor(
self,
processor: "AbstractProcessor",
) -> None:
"""
A function is used to setup a PC tracker in all the cores and
connect all the tracker to the PC tracker manager to perform
multithread PC tracking.
:param processor: The processor used in the simulation configuration.
"""
for core in processor.get_cores():
core.add_pc_tracker_probe(self.get_targets(), self.get_manager())
def update_relatives_counts(self) -> None:
"""
Updates the relative count for restore usage. The new relative count
will be stored in relevant data structures.
"""
current_pair = self.get_current_pair()
region_start_map = self.get_region_start_id_map()
if current_pair in region_start_map:
region_id = region_start_map[current_pair]
self.get_regions()[region_id].update_relatives_counts(
manager=self.get_manager()
)
def get_current_region(self) -> Optional[Union[str, int]]:
"""Returns the region id if the current PC Count pair if significant
(e.g. beginning of the checkpoint), otherwise, it returns None to
indicate the current PC Count pair is not significant.
"""
current_pair = self.get_current_pair()
region_start_map = self.get_region_start_id_map()
if current_pair in region_start_map:
return region_start_map[current_pair]
return None
def get_current_pair(self) -> PcCountPair:
"""This function returns the current PC Count pair."""
return self.get_manager().getCurrentPcCountPair()
def get_region_start_id_map(self) -> Dict[PcCountPair, Union[int, str]]:
"""Returns the starting PcCountPairs mapped to the corresponding region
IDs. This is a helper function for quick mapping of PcCountPairs to
region IDs."""
regions = {}
for rid in self.get_regions():
regions[self.get_regions()[rid].get_start()] = rid
return regions
def get_targets(self) -> List[PcCountPair]:
"""Returns the complete list of target PcCountPairs. That is, the
PcCountPairs each region starts with as well as the relevant warmup
intervals."""
targets = []
for rid in self.get_regions():
targets.extend(self.get_regions()[rid].get_pc_count_pairs())
return targets
def to_json(self) -> Dict[Union[int, str], Dict]:
"""Returns this data-structure as a dictionary for serialization via
the `output_json_file` function."""
to_return = {}
for region_id in self.get_regions():
to_return[region_id] = self.get_regions()[region_id].to_json()
return to_return
def output_json_file(
self,
input_indent: int = 4,
filepath: str = os.path.join(m5.options.outdir, "looppoint.json"),
) -> Dict[int, Dict]:
"""
This function is used to output the _json_file into a json file
:param input_indent: the indent value of the json file
:param filepath: the path of the output json file
"""
with open(filepath, "w") as file:
json.dump(self.to_json(), file, indent=input_indent)
class LooppointCsvLoader(Looppoint):
"""This class will create a Looppoint data structure from data extracted
from a Looppoint pinpoints file."""
def __init__(
self,
pinpoints_file: Union[Path, str],
region_id: Optional[Union[str, int]] = None,
):
"""
:params pinpoints_file: The pinpoints file in which the data is to be
expected.
:params region_id: If set, will only load the specified region data.
Otherwise, all region info is loaded. Is used when restoring to a
particular region.
"""
regions = {}
warmups = {}
_path = (
pinpoints_file
if isinstance(pinpoints_file, Path)
else Path(pinpoints_file)
)
# This section is hard-coded to parse the data in the csv file.
# The csv file is assumed to have a constant format.
with open(_path, newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=" ", quotechar="|")
for row in reader:
if len(row) > 1:
if row[0] == "cluster":
# if it is a simulation region
line = row[4].split(",")
rid = int(line[2])
region_start = LooppointRegionPC(
pc=int(line[3], 16),
globl=int(line[6]),
# From the CSV's I've observed, the start relative
# value is never set, while the end is always set.
# Given limited information, I can only determine
# this is a rule of how the CSV is setup.
relative=None,
)
region_end = LooppointRegionPC(
pc=int(line[7], 16),
globl=int(line[10]),
relative=int(line[11]),
)
simulation = LooppointSimulation(
start=region_start, end=region_end
)
multiplier = float(line[14])
region = LooppointRegion(
simulation=simulation, multiplier=multiplier
)
regions[rid] = region
elif row[0] == "Warmup":
line = row[3].split(",")
rid = int(line[0])
start = PcCountPair(int(line[3], 16), int(line[6]))
end = PcCountPair(int(line[7], 16), int(line[10]))
warmup = LooppointRegionWarmup(start=start, end=end)
warmups[rid] = warmup
for rid in warmups:
if rid not in regions:
raise Exception(
"Warmup region ID '{rid}' does not have a "
"corresponding region."
)
regions[rid]._warmup = warmups[rid]
super().__init__(regions=regions)
if region_id:
self.set_target_region_id(region_id=region_id)
class LooppointJsonLoader(Looppoint):
"""This class will create a generate a Looppoint data structure from data
extracted from a Looppoint json file."""
def __init__(
self,
looppoint_file: Union[str, Path],
region_id: Optional[Union[str, int]] = None,
) -> None:
"""
:param looppoint_file: a json file generated by gem5 that has all the
LoopPoint data information
:params region_id: If set, will only load the specified region data.
Otherwise, all region info is loaded. Is used when restoring to a
particular region.
"""
_path = (
looppoint_file
if isinstance(looppoint_file, Path)
else Path(looppoint_file)
)
regions = {}
with open(_path) as file:
json_contents = json.load(file)
for rid in json_contents:
start_pc = int(json_contents[rid]["simulation"]["start"]["pc"])
start_globl = int(
json_contents[rid]["simulation"]["start"]["global"]
)
start_relative = (
int(json_contents[rid]["simulation"]["start"]["relative"])
if "relative" in json_contents[rid]["simulation"]["start"]
else None
)
start = LooppointRegionPC(
pc=start_pc,
globl=start_globl,
relative=start_relative,
)
end_pc = int(json_contents[rid]["simulation"]["end"]["pc"])
end_globl = int(
json_contents[rid]["simulation"]["end"]["global"]
)
end_relative = (
int(json_contents[rid]["simulation"]["end"]["relative"])
if "relative" in json_contents[rid]["simulation"]["end"]
else None
)
end = LooppointRegionPC(
pc=end_pc,
globl=end_globl,
relative=end_relative,
)
simulation = LooppointSimulation(start=start, end=end)
multiplier = float(json_contents[rid]["multiplier"])
warmup = None
if "warmup" in json_contents[rid]:
start = PcCountPair(
json_contents[rid]["warmup"]["start"]["pc"],
json_contents[rid]["warmup"]["start"]["count"],
)
end = PcCountPair(
json_contents[rid]["warmup"]["end"]["pc"],
json_contents[rid]["warmup"]["end"]["count"],
)
warmup = LooppointRegionWarmup(start=start, end=end)
regions[rid] = LooppointRegion(
simulation=simulation, multiplier=multiplier, warmup=warmup
)
super().__init__(regions=regions)
if region_id:
self.set_target_region_id(region_id=region_id)

View File

@@ -29,7 +29,7 @@ import m5.stats
from ..components.processors.abstract_processor import AbstractProcessor
from ..components.processors.switchable_processor import SwitchableProcessor
from ..resources.resource import SimpointResource
from gem5.utils.looppoint import LoopPoint
from gem5.resources.looppoint import Looppoint
from m5.util import warn
from pathlib import Path
@@ -172,7 +172,7 @@ def simpoints_save_checkpoint_generator(
def looppoint_save_checkpoint_generator(
checkpoint_dir: Path,
looppoint: LoopPoint,
looppoint: Looppoint,
update_relatives: bool = True,
exit_when_empty: bool = True,
):
@@ -203,7 +203,7 @@ def looppoint_save_checkpoint_generator(
# will return an integer greater than 0. By significant PC Count pair,
# it means the PC Count pair that indicates where to take the
# checkpoint at. This is determined in the LoopPoint module.
if region != -1:
if region:
if update_relatives:
looppoint.update_relatives_counts()
m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix())

View File

@@ -1,384 +0,0 @@
# Copyright (c) 2022 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from m5.util import fatal
from m5.params import PcCountPair
from pathlib import Path
from typing import List, Dict, Tuple
from gem5.components.processors.abstract_processor import AbstractProcessor
from m5.objects import PcCountTrackerManager
import csv
import re
import json
import m5
import os
class LoopPoint:
"""
This LoopPoint class is used to manage the information needed for LoopPoint
in workload
"""
def __init__(
self,
targets: List[PcCountPair],
regions: Dict[PcCountPair, int],
json_file: Dict[int, Dict],
) -> None:
"""
:param targets: a list of PcCountPair that are used to generate exit
event at when the PcCountTrackerManager encounter this PcCountPair in
execution
:param regions: a dictionary used to find the corresponding region id
for the significant PcCountPair. This is mainly used to ensure
checkpoints are taken in the correct PcCountPair or relative counts are
updated at the correct count
:param json_file: all the LoopPoint data including relative counts and
multiplier are stored in this parameter. It can be outputted as a json
file.
"""
self._manager = PcCountTrackerManager()
self._manager.targets = targets
self._targets = targets
self._regions = regions
self._json_file = json_file
def setup_processor(
self,
processor: AbstractProcessor,
) -> None:
"""
This function is used to setup a PC tracker in all the cores and
connect all the tracker to the PC tracker manager to perform
multithread PC tracking
:param processor: the processor used in the simulation configuration
"""
for core in processor.get_cores():
core.add_pc_tracker_probe(self._targets, self._manager)
def update_relatives_counts(self) -> None:
"""
This function is used to update the relative count for restore used.
The new relative count will be stored in the _json_file and can be
outputted into a json file by calling the output_json_file function.
"""
current_pair = self._manager.getCurrentPcCountPair()
if current_pair in self._regions:
rid = self._regions[current_pair]
region = self._json_file[rid]["simulation"]
if "warmup" in self._json_file[rid]:
# if this region has a warmup interval,
# then update the relative count for the
# start of the simulation region
start = region["start"]["pc"]
temp = region["start"]["global"] - self._manager.getPcCount(
start
)
self._json_file[rid]["simulation"]["start"]["relative"] = int(
temp
)
end = region["end"]["pc"]
temp = region["end"]["global"] - self._manager.getPcCount(end)
self._json_file[rid]["simulation"]["end"]["relative"] = int(temp)
def output_json_file(
self,
input_indent: int = 4,
filepath: str = os.path.join(m5.options.outdir, "outdir.json"),
) -> Dict[int, Dict]:
"""
This function is used to output the _json_file into a json file
:param input_indent: the indent value of the json file
:param filepath: the path of the output file
"""
with open(filepath, "w") as file:
json.dump(self._json_file, file, indent=input_indent)
def get_current_region(self) -> int:
"""
This function returns the region id if the current PC Count pair is
significant(e.x. beginning of the checkpoint), otherwise, it returns
a '-1' to indicate the current PC Count pair is not significant
"""
current_pair = self._manager.getCurrentPcCountPair()
if current_pair in self._regions:
return self._regions[current_pair]
return -1
def get_current_pair(self) -> PcCountPair:
"""
This function returns the current PC Count pair
"""
return self._manager.getCurrentPcCountPair()
def get_regions(self) -> Dict[PcCountPair, int]:
"""
This function returns the complete dictionary of _regions
"""
return self._regions
def get_targets(self) -> List[PcCountPair]:
"""
This function returns the complete list of _targets
"""
return self._targets
class LoopPointCheckpoint(LoopPoint):
def __init__(self, looppoint_file: Path, if_csv: bool) -> None:
"""
This class is specifically designed to take in the LoopPoint data file
and generate the information needed to take checkpoints for LoopPoint
regions(warmup region+simulation region)
:param looppoint_file: the director of the LoopPoint data file
:param if_csv: if the file is a csv file, then it is True. If the file
is a json file, then it is False
"""
_json_file = {}
_targets = []
_region_id = {}
if if_csv:
self.profile_csv(looppoint_file, _targets, _json_file, _region_id)
else:
self.profile_json(looppoint_file, _targets, _json_file, _region_id)
super().__init__(
_targets,
_region_id,
_json_file,
)
def profile_csv(
self,
looppoint_file_path: Path,
targets: List[PcCountPair],
json_file: Dict[int, Dict],
region_id: Dict[PcCountPair, int],
) -> None:
"""
This function profiles the csv LoopPoint data file into three variables
to take correct checkpoints for LoopPoint
:param looppoint_file_path: the director of the LoopPoint data file
:param targets: a list of PcCountPair
:param json_file: a dictionary for all the LoopPoint data
:param region_id: a dictionary for all the significant PcCountPair and
its corresponding region id
"""
# This section is hard-coded to parse the data in the csv file.
# The csv file is assumed to have a constant format.
with open(looppoint_file_path, newline="") as csvfile:
reader = csv.reader(csvfile, delimiter=" ", quotechar="|")
for row in reader:
if len(row) > 1:
if row[0] == "cluster":
# if it is a simulation region
line = row[4].split(",")
start = PcCountPair(int(line[3], 16), int(line[6]))
end = PcCountPair(int(line[7], 16), int(line[10]))
if int(line[2]) in json_file:
# if this region was created in the json_file
json_file[int(line[2])]["simulation"] = {
"start": {"pc": int(line[3], 16)}
}
else:
json_file[int(line[2])] = {
"simulation": {
"start": {"pc": int(line[3], 16)}
}
}
json_file[int(line[2])]["simulation"]["start"][
"global"
] = int(line[6])
json_file[int(line[2])]["simulation"]["end"] = {
"pc": int(line[7], 16)
}
json_file[int(line[2])]["simulation"]["end"][
"global"
] = int(line[10])
json_file[int(line[2])]["multiplier"] = float(line[14])
targets.append(start)
targets.append(end)
# store all the PC Count pairs from the file to the
# targets list
elif row[0] == "Warmup":
line = row[3].split(",")
start = PcCountPair(int(line[3], 16), int(line[6]))
end = PcCountPair(int(line[7], 16), int(line[10]))
if int(line[0]) in json_file:
json_file[int(line[0])]["warmup"] = {
"start": {"pc": int(line[3], 16)}
}
else:
json_file[int(line[0])] = {
"warmup": {"start": {"pc": int(line[3], 16)}}
}
json_file[int(line[0])]["warmup"]["start"][
"count"
] = int(line[6])
json_file[int(line[0])]["warmup"]["end"] = {
"pc": int(line[7], 16)
}
json_file[int(line[0])]["warmup"]["end"][
"count"
] = int(line[10])
targets.append(start)
targets.append(end)
# store all the PC Count pairs from the file to the
# targets list
for rid, region in json_file.items():
# this loop iterates all the regions and find the significant PC
# Count pair for the region
if "warmup" in region:
# if the region has a warmup interval, then the checkpoint
# should be taken at the start of the warmup interval
start = PcCountPair(
region["warmup"]["start"]["pc"],
region["warmup"]["start"]["count"],
)
else:
# if the region does not have a warmup interval, then the
# checkpoint should be taken at the start of the simulation
# region
start = PcCountPair(
region["simulation"]["start"]["pc"],
region["simulation"]["start"]["global"],
)
region_id[start] = rid
def profile_json(
self,
looppoint_file_path: Path,
targets: List[PcCountPair],
json_file: Dict[int, Dict],
region_id: Dict[PcCountPair, int],
) -> None:
"""
This function profiles the json LoopPoint data file into three
variables to take correct checkpoints for LoopPoint
:param looppoint_file_path: the director of the LoopPoint data file
:param targets: a list of PcCountPair
:param json_file: a dictionary for all the LoopPoint data
:param region_id: a dictionary for all the significant PcCountPair and
its corresponding region id
"""
with open(looppoint_file_path) as file:
json_file = json.load(file)
# load all json information into the json_file variable
for rid, region in json_file.items():
# iterates all regions
sim_start = PcCountPair(
region["simulation"]["start"]["pc"],
region["simulation"]["start"]["global"],
)
targets.append(sim_start)
# store all PC Count pairs in the file into targets list
end = PcCountPair(
region["simulation"]["end"]["pc"],
region["simulation"]["end"]["global"],
)
targets.append(end)
if "warmup" in region:
# if there is a warmup in the region, then the checkpoint
# should be taken at the start of the warmup interval
start = PcCountPair(
region["warmup"]["start"]["pc"],
region["warmup"]["start"]["count"],
)
targets.append(start)
end = PcCountPair(
region["warmup"]["end"]["pc"],
region["warmup"]["end"]["count"],
)
targets.append(end)
else:
# if there is not a warmup interval in the region, then the
# checkpoint should be taken at the start of the simulation
# region
start = sim_start
region_id[start] = rid
class LoopPointRestore(LoopPoint):
def __init__(self, looppoint_file: Path, region_id: int) -> None:
"""
This class is specifically designed to take in the LoopPoint data file
and generator information needed to restore a checkpoint taken by the
LoopPointCheckPoint.
:param looppoint_file: a json file generated by gem5 that has all the
LoopPoint data information
:param region_id: The region ID we will be restoring to.
"""
with open(looppoint_file) as file:
json_file = json.load(file)
targets, regions = self.get_region(
json_file=json_file, region_id=region_id
)
super().__init__(targets=targets, regions=regions, json_file=json_file)
def get_region(
self, json_file: Dict[int, Dict], region_id: int
) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]:
to_return_region = {}
to_return_targets = []
if region_id not in json_file:
# if the region id does not exist in the LoopPoint data
# file raise a fatal message
fatal(f"{region_id} is not a valid region\n")
region = json_file[region_id]
if "warmup" in region:
if "relative" not in region["simulation"]["start"]:
# if there are not relative counts for the PC Count
# pair then it means there is not enough information to
# restore this checkpoint
fatal(f"region {region_id} doesn't have relative count info\n")
start = PcCountPair(
region["simulation"]["start"]["pc"],
region["simulation"]["start"]["relative"],
)
to_return_region[start] = region_id
to_return_targets.append(start)
if "relative" not in region["simulation"]["end"]:
fatal(f"region {region_id} doesn't have relative count info\n")
end = PcCountPair(
region["simulation"]["end"]["pc"],
region["simulation"]["end"]["relative"],
)
to_return_region[end] = region_id
to_return_targets.append(end)
return to_return_targets, to_return_region