diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py index 1d8525fe77..f967aa56e4 100644 --- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -61,7 +61,7 @@ from pathlib import Path from gem5.simulate.exit_event_generators import ( looppoint_save_checkpoint_generator, ) -from gem5.utils.looppoint import LoopPointCheckpoint +from gem5.resources.looppoint import LooppointCsvLoader import argparse requires(isa_required=ISA.X86) @@ -103,16 +103,13 @@ processor = SimpleProcessor( num_cores=9, ) -looppoint = LoopPointCheckpoint( +looppoint = LooppointCsvLoader( # Pass in the LoopPoint data file looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-global-pinpoints" ).get_local_path() - ), - # True if the LoopPoint data file is a csv generated by Pin. - # False if it is a JSON file generated by the gem5 simulator. - if_csv=True, + ) ) board = SimpleBoard( diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py index 28645259d0..c54fdabca1 100644 --- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -55,7 +55,7 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.resources.resource import obtain_resource from pathlib import Path -from gem5.utils.looppoint import LoopPointRestore +from gem5.utils.resource import LooppointJsonLoader from m5.stats import reset, dump requires(isa_required=ISA.X86) @@ -113,7 +113,7 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -looppoint = LoopPointRestore( +looppoint = LooppointJsonLoader( looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-looppoint" diff --git a/src/python/SConscript b/src/python/SConscript index 68b5e1d926..f401c03468 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -240,7 +240,6 @@ PySource('gem5.components.processors', PySource('gem5.components.processors', 'gem5/components/processors/switchable_processor.py') PySource('gem5.utils', 'gem5/utils/simpoint.py') -PySource('gem5.utils', 'gem5/utils/looppoint.py') PySource('gem5.components.processors', 'gem5/components/processors/traffic_generator_core.py') PySource('gem5.components.processors', @@ -263,6 +262,7 @@ PySource('gem5.resources', 'gem5/resources/downloader.py') PySource('gem5.resources', 'gem5/resources/md5_utils.py') PySource('gem5.resources', 'gem5/resources/resource.py') PySource('gem5.resources', 'gem5/resources/workload.py') +PySource('gem5.resources', 'gem5/resources/looppoint.py') PySource('gem5.utils', 'gem5/utils/__init__.py') PySource('gem5.utils', 'gem5/utils/filelock.py') PySource('gem5.utils', 'gem5/utils/override.py') diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 404a78458f..dc5425754e 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -35,7 +35,7 @@ from ...resources.resource import ( SimpointDirectoryResource, ) -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.objects import SEWorkload, Process @@ -177,9 +177,10 @@ class SEBinaryWorkload: def set_se_looppoint_workload( self, binary: AbstractResource, + looppoint: Looppoint, arguments: List[str] = [], - looppoint: Optional[Union[AbstractResource, LoopPoint]] = None, checkpoint: Optional[Union[Path, AbstractResource]] = None, + region_id: Optional[Union[int, str]] = None, ) -> None: """Set up the system to run a LoopPoint workload. @@ -188,18 +189,18 @@ class SEBinaryWorkload: ISA and the simulated ISA are the same. :param binary: The resource encapsulating the binary to be run. - :param arguments: The input arguments for the binary :param looppoint: The LoopPoint object that contain all the information gather from the LoopPoint files and a LoopPointManager that will raise exit events for LoopPoints + :param arguments: The input arguments for the binary + :param region_id: If set, will only load the Looppoint region + corresponding to that ID. """ - if isinstance(looppoint, AbstractResource): - self._looppoint_object = LoopPoint(looppoint) - else: - assert isinstance(looppoint, LoopPoint) - self._looppoint_object = looppoint - + assert isinstance(looppoint, Looppoint) + self._looppoint_object = looppoint + if region_id: + self._looppoint_object.set_target_region_id(region_id=region_id) self._looppoint_object.setup_processor(self.get_processor()) # Call set_se_binary_workload after LoopPoint setup is complete @@ -209,7 +210,7 @@ class SEBinaryWorkload: checkpoint=checkpoint, ) - def get_looppoint(self) -> LoopPoint: + def get_looppoint(self) -> Looppoint: """ Returns the LoopPoint object set. If no LoopPoint object has been set an exception is thrown. diff --git a/src/python/gem5/resources/looppoint.py b/src/python/gem5/resources/looppoint.py new file mode 100644 index 0000000000..684faef37d --- /dev/null +++ b/src/python/gem5/resources/looppoint.py @@ -0,0 +1,544 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import PcCountPair +from m5.objects import PcCountTrackerManager +import m5 + +import os +import csv +import json +from pathlib import Path +from typing import List, Optional, Dict, Union + + +class LooppointRegionPC: + """A data structure for storing the Looppoint region's PC information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, pc: int, globl: int, relative: Optional[int] = None): + """ + :param pc: The Program Counter value of this region. + :param globl: The global value of this region. + :param relative: The relative program counter value. Optional. + """ + self._pc = pc + self._global = globl + self._relative = relative + + def get_pc(self) -> int: + """Returns the Program counter value.""" + return self._pc + + def get_global(self) -> int: + """Returns the global value.""" + return self._global + + def get_relative(self) -> Optional[int]: + """If specified, returns the relative Program counter value, otherwise + returns None.""" + return self._relative + + def get_pc_count_pair(self) -> PcCountPair: + """Returns the PcCountPair for this Region PC value.""" + return PcCountPair(self.get_pc(), self.get_global()) + + def update_relative_count(self, manager: PcCountTrackerManager) -> None: + """Updates the relative count.""" + self._relative = int( + self.get_global() - manager.getPcCount(self.get_pc()) + ) + + def to_json(self) -> Dict[str, int]: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "pc": self.get_pc(), + "global": self.get_global(), + } + if self._relative: + to_return["relative"] = self.get_relative() + + return to_return + + +class LooppointRegionWarmup: + """A data structure for storing a Looppoint region's warmup data. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: PcCountPair, end: PcCountPair): + """ + :param start: The starting PcCountPair. + :param end: The ending PcCountPair. + """ + self._start = start + self._end = end + + def get_start(self) -> PcCountPair: + """Returns the PcCountPair for the start of the region warmup.""" + return self._start + + def get_end(self) -> PcCountPair: + """Returns the PcCountPair for the end of the region warmup.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the start and end PC count pairs.""" + return [self.get_start(), self.get_end()] + + def to_json(self) -> Dict[str, Dict[str, int]]: + """Returns this class in a JSON structure which can then be + serialized.""" + return { + "start": { + "pc": self.get_start().pc, + "count": self.get_start().count, + }, + "end": { + "pc": self.get_end().pc, + "count": self.get_end().count, + }, + } + + +class LooppointSimulation: + """A data structure to store the simulation region start and end region. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: LooppointRegionPC, end: LooppointRegionPC): + """ + :param start: The starting LooppointRegionPC. + :param end: The ending LoopppointRegionPC. + """ + self._start = start + self._end = end + + def get_start(self) -> LooppointRegionPC: + """Returns the starting LooppointRegionPC data structure.""" + return self._start + + def get_end(self) -> LooppointRegionPC: + """Returns the ending LooppointRegionPC data structure.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for the start and end + LoopointRegionPCs.""" + return [ + self.get_start().get_pc_count_pair(), + self.get_end().get_pc_count_pair(), + ] + + def update_relatives_counts( + self, manager: PcCountTrackerManager, include_start: bool = False + ) -> None: + """Updates the relative counts for this simulation region.""" + if include_start: + # if this region has a warmup interval, + # then update the relative count for the + # start of the simulation region + self.get_start().update_relative_count(manager=manager) + + self.get_end().update_relative_count(manager=manager) + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + return { + "start": self.get_start().to_json(), + "end": self.get_end().to_json(), + } + + +class LooppointRegion: + """A data structure to store Looppoint region information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__( + self, + simulation: LooppointSimulation, + multiplier: float, + warmup: Optional[LooppointRegionWarmup] = None, + ): + """ + :param simulation: The simulation information for this Looppoint + region. + :param multiplier: The multiplier for this Looppoint region. + :param warmup: The warmup information for this Looppoint region. + Optional. + """ + self._simulation = simulation + self._multiplier = multiplier + self._warmup = warmup + + def get_simulation(self) -> LooppointSimulation: + """Returns the simulation region information.""" + return self._simulation + + def get_multiplier(self) -> float: + """Returns the multiplier.""" + return self._multiplier + + def get_warmup(self) -> Optional[LooppointRegionWarmup]: + """If set, returns the warmup region information. Otherwise None.""" + return self._warmup + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for this Looppoint region.""" + pc_count_pairs = self.get_simulation().get_pc_count_pairs() + if self.get_warmup(): + pc_count_pairs.extend(self.get_warmup().get_pc_count_pairs()) + return pc_count_pairs + + def update_relatives_counts(self, manager: PcCountTrackerManager) -> None: + """Updates the relative counds of this Looppoint region.""" + self.get_simulation().update_relatives_counts( + manager=manager, include_start=bool(self.get_warmup()) + ) + + def get_start(self) -> PcCountPair: + """Returns the correct starting PcCountPair for this Looppoint + region.""" + if self.get_warmup(): + return self.get_warmup().get_start() + return self.get_simulation().get_start().get_pc_count_pair() + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "simulation": self.get_simulation().to_json(), + "multiplier": self.get_multiplier(), + } + if self.get_warmup(): + to_return["warmup"] = self.get_warmup().to_json() + return to_return + + +class Looppoint: + """Stores all the Looppoint information for a gem5 workload.""" + + def __init__(self, regions: Dict[Union[str, int], LooppointRegion]): + """ + :param regions: A dictionary mapping the region_ids with the + LooppointRegions. + """ + self._regions = regions + self._manager = PcCountTrackerManager() + self._manager.targets = self.get_targets() + + def set_target_region_id(self, region_id: Union[str, int]) -> None: + """There are use-cases where we want to obtain a looppoint data + structure containing a single target region via its ID. This function + will remove all irrelevant regions.""" + + if region_id not in self._regions: + raise Exception(f"Region ID '{region_id}' cannot be found.") + + to_remove = [rid for rid in self._regions if rid is not region_id] + for rid in to_remove: + del self._regions[rid] + + self._manager.targets = self.get_targets() + + def get_manager(self) -> PcCountTrackerManager: + """Returns the PcCountTrackerManager for this Looppoint data + structure.""" + return self._manager + + def get_regions(self) -> Dict[Union[int, str], LooppointRegion]: + """Returns the regions for this Looppoint data structure.""" + return self._regions + + def setup_processor( + self, + processor: "AbstractProcessor", + ) -> None: + """ + A function is used to setup a PC tracker in all the cores and + connect all the tracker to the PC tracker manager to perform + multithread PC tracking. + + :param processor: The processor used in the simulation configuration. + """ + for core in processor.get_cores(): + core.add_pc_tracker_probe(self.get_targets(), self.get_manager()) + + def update_relatives_counts(self) -> None: + """ + Updates the relative count for restore usage. The new relative count + will be stored in relevant data structures. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + region_id = region_start_map[current_pair] + self.get_regions()[region_id].update_relatives_counts( + manager=self.get_manager() + ) + + def get_current_region(self) -> Optional[Union[str, int]]: + """Returns the region id if the current PC Count pair if significant + (e.g. beginning of the checkpoint), otherwise, it returns None to + indicate the current PC Count pair is not significant. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + return region_start_map[current_pair] + return None + + def get_current_pair(self) -> PcCountPair: + """This function returns the current PC Count pair.""" + return self.get_manager().getCurrentPcCountPair() + + def get_region_start_id_map(self) -> Dict[PcCountPair, Union[int, str]]: + """Returns the starting PcCountPairs mapped to the corresponding region + IDs. This is a helper function for quick mapping of PcCountPairs to + region IDs.""" + + regions = {} + for rid in self.get_regions(): + regions[self.get_regions()[rid].get_start()] = rid + + return regions + + def get_targets(self) -> List[PcCountPair]: + """Returns the complete list of target PcCountPairs. That is, the + PcCountPairs each region starts with as well as the relevant warmup + intervals.""" + targets = [] + for rid in self.get_regions(): + targets.extend(self.get_regions()[rid].get_pc_count_pairs()) + + return targets + + def to_json(self) -> Dict[Union[int, str], Dict]: + """Returns this data-structure as a dictionary for serialization via + the `output_json_file` function.""" + to_return = {} + for region_id in self.get_regions(): + to_return[region_id] = self.get_regions()[region_id].to_json() + return to_return + + def output_json_file( + self, + input_indent: int = 4, + filepath: str = os.path.join(m5.options.outdir, "looppoint.json"), + ) -> Dict[int, Dict]: + """ + This function is used to output the _json_file into a json file + + :param input_indent: the indent value of the json file + :param filepath: the path of the output json file + """ + with open(filepath, "w") as file: + json.dump(self.to_json(), file, indent=input_indent) + + +class LooppointCsvLoader(Looppoint): + """This class will create a Looppoint data structure from data extracted + from a Looppoint pinpoints file.""" + + def __init__( + self, + pinpoints_file: Union[Path, str], + region_id: Optional[Union[str, int]] = None, + ): + """ + :params pinpoints_file: The pinpoints file in which the data is to be + expected. + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + regions = {} + warmups = {} + + _path = ( + pinpoints_file + if isinstance(pinpoints_file, Path) + else Path(pinpoints_file) + ) + + # This section is hard-coded to parse the data in the csv file. + # The csv file is assumed to have a constant format. + with open(_path, newline="") as csvfile: + reader = csv.reader(csvfile, delimiter=" ", quotechar="|") + for row in reader: + if len(row) > 1: + if row[0] == "cluster": + # if it is a simulation region + line = row[4].split(",") + + rid = int(line[2]) + + region_start = LooppointRegionPC( + pc=int(line[3], 16), + globl=int(line[6]), + # From the CSV's I've observed, the start relative + # value is never set, while the end is always set. + # Given limited information, I can only determine + # this is a rule of how the CSV is setup. + relative=None, + ) + + region_end = LooppointRegionPC( + pc=int(line[7], 16), + globl=int(line[10]), + relative=int(line[11]), + ) + + simulation = LooppointSimulation( + start=region_start, end=region_end + ) + + multiplier = float(line[14]) + + region = LooppointRegion( + simulation=simulation, multiplier=multiplier + ) + + regions[rid] = region + + elif row[0] == "Warmup": + line = row[3].split(",") + rid = int(line[0]) + start = PcCountPair(int(line[3], 16), int(line[6])) + end = PcCountPair(int(line[7], 16), int(line[10])) + + warmup = LooppointRegionWarmup(start=start, end=end) + warmups[rid] = warmup + + for rid in warmups: + if rid not in regions: + raise Exception( + "Warmup region ID '{rid}' does not have a " + "corresponding region." + ) + regions[rid]._warmup = warmups[rid] + + super().__init__(regions=regions) + + if region_id: + self.set_target_region_id(region_id=region_id) + + +class LooppointJsonLoader(Looppoint): + """This class will create a generate a Looppoint data structure from data + extracted from a Looppoint json file.""" + + def __init__( + self, + looppoint_file: Union[str, Path], + region_id: Optional[Union[str, int]] = None, + ) -> None: + """ + :param looppoint_file: a json file generated by gem5 that has all the + LoopPoint data information + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + _path = ( + looppoint_file + if isinstance(looppoint_file, Path) + else Path(looppoint_file) + ) + + regions = {} + with open(_path) as file: + json_contents = json.load(file) + for rid in json_contents: + + start_pc = int(json_contents[rid]["simulation"]["start"]["pc"]) + start_globl = int( + json_contents[rid]["simulation"]["start"]["global"] + ) + start_relative = ( + int(json_contents[rid]["simulation"]["start"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["start"] + else None + ) + start = LooppointRegionPC( + pc=start_pc, + globl=start_globl, + relative=start_relative, + ) + + end_pc = int(json_contents[rid]["simulation"]["end"]["pc"]) + end_globl = int( + json_contents[rid]["simulation"]["end"]["global"] + ) + end_relative = ( + int(json_contents[rid]["simulation"]["end"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["end"] + else None + ) + end = LooppointRegionPC( + pc=end_pc, + globl=end_globl, + relative=end_relative, + ) + simulation = LooppointSimulation(start=start, end=end) + multiplier = float(json_contents[rid]["multiplier"]) + warmup = None + if "warmup" in json_contents[rid]: + start = PcCountPair( + json_contents[rid]["warmup"]["start"]["pc"], + json_contents[rid]["warmup"]["start"]["count"], + ) + end = PcCountPair( + json_contents[rid]["warmup"]["end"]["pc"], + json_contents[rid]["warmup"]["end"]["count"], + ) + warmup = LooppointRegionWarmup(start=start, end=end) + + regions[rid] = LooppointRegion( + simulation=simulation, multiplier=multiplier, warmup=warmup + ) + + super().__init__(regions=regions) + if region_id: + self.set_target_region_id(region_id=region_id) diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index 82eba17543..37998d3a9b 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -29,7 +29,7 @@ import m5.stats from ..components.processors.abstract_processor import AbstractProcessor from ..components.processors.switchable_processor import SwitchableProcessor from ..resources.resource import SimpointResource -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.util import warn from pathlib import Path @@ -172,7 +172,7 @@ def simpoints_save_checkpoint_generator( def looppoint_save_checkpoint_generator( checkpoint_dir: Path, - looppoint: LoopPoint, + looppoint: Looppoint, update_relatives: bool = True, exit_when_empty: bool = True, ): @@ -203,7 +203,7 @@ def looppoint_save_checkpoint_generator( # will return an integer greater than 0. By significant PC Count pair, # it means the PC Count pair that indicates where to take the # checkpoint at. This is determined in the LoopPoint module. - if region != -1: + if region: if update_relatives: looppoint.update_relatives_counts() m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix()) diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py deleted file mode 100644 index 8e01e3030f..0000000000 --- a/src/python/gem5/utils/looppoint.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) 2022 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -from m5.util import fatal -from m5.params import PcCountPair -from pathlib import Path -from typing import List, Dict, Tuple -from gem5.components.processors.abstract_processor import AbstractProcessor -from m5.objects import PcCountTrackerManager -import csv -import re -import json -import m5 -import os - - -class LoopPoint: - """ - This LoopPoint class is used to manage the information needed for LoopPoint - in workload - """ - - def __init__( - self, - targets: List[PcCountPair], - regions: Dict[PcCountPair, int], - json_file: Dict[int, Dict], - ) -> None: - """ - :param targets: a list of PcCountPair that are used to generate exit - event at when the PcCountTrackerManager encounter this PcCountPair in - execution - :param regions: a dictionary used to find the corresponding region id - for the significant PcCountPair. This is mainly used to ensure - checkpoints are taken in the correct PcCountPair or relative counts are - updated at the correct count - :param json_file: all the LoopPoint data including relative counts and - multiplier are stored in this parameter. It can be outputted as a json - file. - """ - - self._manager = PcCountTrackerManager() - self._manager.targets = targets - self._targets = targets - self._regions = regions - self._json_file = json_file - - def setup_processor( - self, - processor: AbstractProcessor, - ) -> None: - """ - This function is used to setup a PC tracker in all the cores and - connect all the tracker to the PC tracker manager to perform - multithread PC tracking - :param processor: the processor used in the simulation configuration - """ - for core in processor.get_cores(): - core.add_pc_tracker_probe(self._targets, self._manager) - - def update_relatives_counts(self) -> None: - """ - This function is used to update the relative count for restore used. - The new relative count will be stored in the _json_file and can be - outputted into a json file by calling the output_json_file function. - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - rid = self._regions[current_pair] - region = self._json_file[rid]["simulation"] - if "warmup" in self._json_file[rid]: - # if this region has a warmup interval, - # then update the relative count for the - # start of the simulation region - start = region["start"]["pc"] - temp = region["start"]["global"] - self._manager.getPcCount( - start - ) - self._json_file[rid]["simulation"]["start"]["relative"] = int( - temp - ) - end = region["end"]["pc"] - temp = region["end"]["global"] - self._manager.getPcCount(end) - self._json_file[rid]["simulation"]["end"]["relative"] = int(temp) - - def output_json_file( - self, - input_indent: int = 4, - filepath: str = os.path.join(m5.options.outdir, "outdir.json"), - ) -> Dict[int, Dict]: - """ - This function is used to output the _json_file into a json file - :param input_indent: the indent value of the json file - :param filepath: the path of the output file - """ - with open(filepath, "w") as file: - json.dump(self._json_file, file, indent=input_indent) - - def get_current_region(self) -> int: - """ - This function returns the region id if the current PC Count pair is - significant(e.x. beginning of the checkpoint), otherwise, it returns - a '-1' to indicate the current PC Count pair is not significant - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - return self._regions[current_pair] - return -1 - - def get_current_pair(self) -> PcCountPair: - """ - This function returns the current PC Count pair - """ - return self._manager.getCurrentPcCountPair() - - def get_regions(self) -> Dict[PcCountPair, int]: - """ - This function returns the complete dictionary of _regions - """ - return self._regions - - def get_targets(self) -> List[PcCountPair]: - """ - This function returns the complete list of _targets - """ - return self._targets - - -class LoopPointCheckpoint(LoopPoint): - def __init__(self, looppoint_file: Path, if_csv: bool) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generate the information needed to take checkpoints for LoopPoint - regions(warmup region+simulation region) - :param looppoint_file: the director of the LoopPoint data file - :param if_csv: if the file is a csv file, then it is True. If the file - is a json file, then it is False - """ - - _json_file = {} - _targets = [] - _region_id = {} - - if if_csv: - self.profile_csv(looppoint_file, _targets, _json_file, _region_id) - else: - self.profile_json(looppoint_file, _targets, _json_file, _region_id) - - super().__init__( - _targets, - _region_id, - _json_file, - ) - - def profile_csv( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the csv LoopPoint data file into three variables - to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - # This section is hard-coded to parse the data in the csv file. - # The csv file is assumed to have a constant format. - with open(looppoint_file_path, newline="") as csvfile: - reader = csv.reader(csvfile, delimiter=" ", quotechar="|") - for row in reader: - if len(row) > 1: - if row[0] == "cluster": - # if it is a simulation region - line = row[4].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[2]) in json_file: - # if this region was created in the json_file - json_file[int(line[2])]["simulation"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[2])] = { - "simulation": { - "start": {"pc": int(line[3], 16)} - } - } - json_file[int(line[2])]["simulation"]["start"][ - "global" - ] = int(line[6]) - json_file[int(line[2])]["simulation"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[2])]["simulation"]["end"][ - "global" - ] = int(line[10]) - json_file[int(line[2])]["multiplier"] = float(line[14]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - elif row[0] == "Warmup": - line = row[3].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[0]) in json_file: - json_file[int(line[0])]["warmup"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[0])] = { - "warmup": {"start": {"pc": int(line[3], 16)}} - } - json_file[int(line[0])]["warmup"]["start"][ - "count" - ] = int(line[6]) - json_file[int(line[0])]["warmup"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[0])]["warmup"]["end"][ - "count" - ] = int(line[10]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - - for rid, region in json_file.items(): - # this loop iterates all the regions and find the significant PC - # Count pair for the region - if "warmup" in region: - # if the region has a warmup interval, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - else: - # if the region does not have a warmup interval, then the - # checkpoint should be taken at the start of the simulation - # region - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - region_id[start] = rid - - def profile_json( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the json LoopPoint data file into three - variables to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - with open(looppoint_file_path) as file: - json_file = json.load(file) - # load all json information into the json_file variable - for rid, region in json_file.items(): - # iterates all regions - sim_start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - targets.append(sim_start) - # store all PC Count pairs in the file into targets list - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["global"], - ) - targets.append(end) - if "warmup" in region: - # if there is a warmup in the region, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - targets.append(start) - end = PcCountPair( - region["warmup"]["end"]["pc"], - region["warmup"]["end"]["count"], - ) - targets.append(end) - else: - # if there is not a warmup interval in the region, then the - # checkpoint should be taken at the start of the simulation - # region - start = sim_start - region_id[start] = rid - - -class LoopPointRestore(LoopPoint): - def __init__(self, looppoint_file: Path, region_id: int) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generator information needed to restore a checkpoint taken by the - LoopPointCheckPoint. - :param looppoint_file: a json file generated by gem5 that has all the - LoopPoint data information - :param region_id: The region ID we will be restoring to. - """ - - with open(looppoint_file) as file: - json_file = json.load(file) - - targets, regions = self.get_region( - json_file=json_file, region_id=region_id - ) - - super().__init__(targets=targets, regions=regions, json_file=json_file) - - def get_region( - self, json_file: Dict[int, Dict], region_id: int - ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]: - to_return_region = {} - to_return_targets = [] - - if region_id not in json_file: - # if the region id does not exist in the LoopPoint data - # file raise a fatal message - fatal(f"{region_id} is not a valid region\n") - region = json_file[region_id] - if "warmup" in region: - if "relative" not in region["simulation"]["start"]: - # if there are not relative counts for the PC Count - # pair then it means there is not enough information to - # restore this checkpoint - fatal(f"region {region_id} doesn't have relative count info\n") - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["relative"], - ) - to_return_region[start] = region_id - to_return_targets.append(start) - if "relative" not in region["simulation"]["end"]: - fatal(f"region {region_id} doesn't have relative count info\n") - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["relative"], - ) - to_return_region[end] = region_id - to_return_targets.append(end) - - return to_return_targets, to_return_region