From aae34302814df270748d542b7fb795f03dd66fad Mon Sep 17 00:00:00 2001 From: "Bobby R. Bruce" Date: Sun, 5 Feb 2023 20:20:30 +0000 Subject: [PATCH] stdlib: Refactor Looppoint This change refactors the Looppoint files. While functionally equivalent, this classes have been moved and altered to be easier to handle going forward. The following changes have been made: - New classes have been added to represent the data structure of the Looppoint JSON. This simplifies the parsing of JSON files and makes it handle Looppoint data structures. Ultimately this is hidden from the user via the new 'gem5.resources.Looppoint' class which will be the front-facing class for Looppoint interactions. - The `LooppointCheckpoint` class has been replaced with `LooppointCsvLoader`. This new class takes in a CSV pintpoints file to load necessary looppoint data. - The `LoopPointRestore` class has been replaced by `LooppointJsonLoader`. - All Looppoint classes have been moved to `gem5.resources`. This will make it easier when we add Looppoints as specific gem5 resources. Change-Id: I11dd1fe8f76658db220320584270d57cb37a3c62 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/67611 Tested-by: kokoro Reviewed-by: Bobby Bruce Maintainer: Bobby Bruce --- .../create-looppoint-checkpoints.py | 9 +- .../restore-looppoint-checkpoint.py | 4 +- src/python/SConscript | 2 +- .../components/boards/se_binary_workload.py | 21 +- src/python/gem5/resources/looppoint.py | 544 ++++++++++++++++++ .../gem5/simulate/exit_event_generators.py | 6 +- src/python/gem5/utils/looppoint.py | 384 ------------- 7 files changed, 564 insertions(+), 406 deletions(-) create mode 100644 src/python/gem5/resources/looppoint.py delete mode 100644 src/python/gem5/utils/looppoint.py diff --git a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py index 1d8525fe77..f967aa56e4 100644 --- a/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py +++ b/configs/example/gem5_library/looppoints/create-looppoint-checkpoints.py @@ -61,7 +61,7 @@ from pathlib import Path from gem5.simulate.exit_event_generators import ( looppoint_save_checkpoint_generator, ) -from gem5.utils.looppoint import LoopPointCheckpoint +from gem5.resources.looppoint import LooppointCsvLoader import argparse requires(isa_required=ISA.X86) @@ -103,16 +103,13 @@ processor = SimpleProcessor( num_cores=9, ) -looppoint = LoopPointCheckpoint( +looppoint = LooppointCsvLoader( # Pass in the LoopPoint data file looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-global-pinpoints" ).get_local_path() - ), - # True if the LoopPoint data file is a csv generated by Pin. - # False if it is a JSON file generated by the gem5 simulator. - if_csv=True, + ) ) board = SimpleBoard( diff --git a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py index 28645259d0..c54fdabca1 100644 --- a/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py +++ b/configs/example/gem5_library/looppoints/restore-looppoint-checkpoint.py @@ -55,7 +55,7 @@ from gem5.components.processors.cpu_types import CPUTypes from gem5.isas import ISA from gem5.resources.resource import obtain_resource from pathlib import Path -from gem5.utils.looppoint import LoopPointRestore +from gem5.utils.resource import LooppointJsonLoader from m5.stats import reset, dump requires(isa_required=ISA.X86) @@ -113,7 +113,7 @@ board = SimpleBoard( cache_hierarchy=cache_hierarchy, ) -looppoint = LoopPointRestore( +looppoint = LooppointJsonLoader( looppoint_file=Path( obtain_resource( "x86-matrix-multiply-omp-100-8-looppoint" diff --git a/src/python/SConscript b/src/python/SConscript index 68b5e1d926..f401c03468 100644 --- a/src/python/SConscript +++ b/src/python/SConscript @@ -240,7 +240,6 @@ PySource('gem5.components.processors', PySource('gem5.components.processors', 'gem5/components/processors/switchable_processor.py') PySource('gem5.utils', 'gem5/utils/simpoint.py') -PySource('gem5.utils', 'gem5/utils/looppoint.py') PySource('gem5.components.processors', 'gem5/components/processors/traffic_generator_core.py') PySource('gem5.components.processors', @@ -263,6 +262,7 @@ PySource('gem5.resources', 'gem5/resources/downloader.py') PySource('gem5.resources', 'gem5/resources/md5_utils.py') PySource('gem5.resources', 'gem5/resources/resource.py') PySource('gem5.resources', 'gem5/resources/workload.py') +PySource('gem5.resources', 'gem5/resources/looppoint.py') PySource('gem5.utils', 'gem5/utils/__init__.py') PySource('gem5.utils', 'gem5/utils/filelock.py') PySource('gem5.utils', 'gem5/utils/override.py') diff --git a/src/python/gem5/components/boards/se_binary_workload.py b/src/python/gem5/components/boards/se_binary_workload.py index 404a78458f..dc5425754e 100644 --- a/src/python/gem5/components/boards/se_binary_workload.py +++ b/src/python/gem5/components/boards/se_binary_workload.py @@ -35,7 +35,7 @@ from ...resources.resource import ( SimpointDirectoryResource, ) -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.objects import SEWorkload, Process @@ -177,9 +177,10 @@ class SEBinaryWorkload: def set_se_looppoint_workload( self, binary: AbstractResource, + looppoint: Looppoint, arguments: List[str] = [], - looppoint: Optional[Union[AbstractResource, LoopPoint]] = None, checkpoint: Optional[Union[Path, AbstractResource]] = None, + region_id: Optional[Union[int, str]] = None, ) -> None: """Set up the system to run a LoopPoint workload. @@ -188,18 +189,18 @@ class SEBinaryWorkload: ISA and the simulated ISA are the same. :param binary: The resource encapsulating the binary to be run. - :param arguments: The input arguments for the binary :param looppoint: The LoopPoint object that contain all the information gather from the LoopPoint files and a LoopPointManager that will raise exit events for LoopPoints + :param arguments: The input arguments for the binary + :param region_id: If set, will only load the Looppoint region + corresponding to that ID. """ - if isinstance(looppoint, AbstractResource): - self._looppoint_object = LoopPoint(looppoint) - else: - assert isinstance(looppoint, LoopPoint) - self._looppoint_object = looppoint - + assert isinstance(looppoint, Looppoint) + self._looppoint_object = looppoint + if region_id: + self._looppoint_object.set_target_region_id(region_id=region_id) self._looppoint_object.setup_processor(self.get_processor()) # Call set_se_binary_workload after LoopPoint setup is complete @@ -209,7 +210,7 @@ class SEBinaryWorkload: checkpoint=checkpoint, ) - def get_looppoint(self) -> LoopPoint: + def get_looppoint(self) -> Looppoint: """ Returns the LoopPoint object set. If no LoopPoint object has been set an exception is thrown. diff --git a/src/python/gem5/resources/looppoint.py b/src/python/gem5/resources/looppoint.py new file mode 100644 index 0000000000..684faef37d --- /dev/null +++ b/src/python/gem5/resources/looppoint.py @@ -0,0 +1,544 @@ +# Copyright (c) 2023 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from m5.params import PcCountPair +from m5.objects import PcCountTrackerManager +import m5 + +import os +import csv +import json +from pathlib import Path +from typing import List, Optional, Dict, Union + + +class LooppointRegionPC: + """A data structure for storing the Looppoint region's PC information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, pc: int, globl: int, relative: Optional[int] = None): + """ + :param pc: The Program Counter value of this region. + :param globl: The global value of this region. + :param relative: The relative program counter value. Optional. + """ + self._pc = pc + self._global = globl + self._relative = relative + + def get_pc(self) -> int: + """Returns the Program counter value.""" + return self._pc + + def get_global(self) -> int: + """Returns the global value.""" + return self._global + + def get_relative(self) -> Optional[int]: + """If specified, returns the relative Program counter value, otherwise + returns None.""" + return self._relative + + def get_pc_count_pair(self) -> PcCountPair: + """Returns the PcCountPair for this Region PC value.""" + return PcCountPair(self.get_pc(), self.get_global()) + + def update_relative_count(self, manager: PcCountTrackerManager) -> None: + """Updates the relative count.""" + self._relative = int( + self.get_global() - manager.getPcCount(self.get_pc()) + ) + + def to_json(self) -> Dict[str, int]: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "pc": self.get_pc(), + "global": self.get_global(), + } + if self._relative: + to_return["relative"] = self.get_relative() + + return to_return + + +class LooppointRegionWarmup: + """A data structure for storing a Looppoint region's warmup data. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: PcCountPair, end: PcCountPair): + """ + :param start: The starting PcCountPair. + :param end: The ending PcCountPair. + """ + self._start = start + self._end = end + + def get_start(self) -> PcCountPair: + """Returns the PcCountPair for the start of the region warmup.""" + return self._start + + def get_end(self) -> PcCountPair: + """Returns the PcCountPair for the end of the region warmup.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the start and end PC count pairs.""" + return [self.get_start(), self.get_end()] + + def to_json(self) -> Dict[str, Dict[str, int]]: + """Returns this class in a JSON structure which can then be + serialized.""" + return { + "start": { + "pc": self.get_start().pc, + "count": self.get_start().count, + }, + "end": { + "pc": self.get_end().pc, + "count": self.get_end().count, + }, + } + + +class LooppointSimulation: + """A data structure to store the simulation region start and end region. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__(self, start: LooppointRegionPC, end: LooppointRegionPC): + """ + :param start: The starting LooppointRegionPC. + :param end: The ending LoopppointRegionPC. + """ + self._start = start + self._end = end + + def get_start(self) -> LooppointRegionPC: + """Returns the starting LooppointRegionPC data structure.""" + return self._start + + def get_end(self) -> LooppointRegionPC: + """Returns the ending LooppointRegionPC data structure.""" + return self._end + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for the start and end + LoopointRegionPCs.""" + return [ + self.get_start().get_pc_count_pair(), + self.get_end().get_pc_count_pair(), + ] + + def update_relatives_counts( + self, manager: PcCountTrackerManager, include_start: bool = False + ) -> None: + """Updates the relative counts for this simulation region.""" + if include_start: + # if this region has a warmup interval, + # then update the relative count for the + # start of the simulation region + self.get_start().update_relative_count(manager=manager) + + self.get_end().update_relative_count(manager=manager) + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + return { + "start": self.get_start().to_json(), + "end": self.get_end().to_json(), + } + + +class LooppointRegion: + """A data structure to store Looppoint region information. + + **Note**: This is not intended to be a user-facing class. The classes + `LooppointJsonLoader` and `LooppointCSVLoader` can be used to load + and restore Simpoint data. + """ + + def __init__( + self, + simulation: LooppointSimulation, + multiplier: float, + warmup: Optional[LooppointRegionWarmup] = None, + ): + """ + :param simulation: The simulation information for this Looppoint + region. + :param multiplier: The multiplier for this Looppoint region. + :param warmup: The warmup information for this Looppoint region. + Optional. + """ + self._simulation = simulation + self._multiplier = multiplier + self._warmup = warmup + + def get_simulation(self) -> LooppointSimulation: + """Returns the simulation region information.""" + return self._simulation + + def get_multiplier(self) -> float: + """Returns the multiplier.""" + return self._multiplier + + def get_warmup(self) -> Optional[LooppointRegionWarmup]: + """If set, returns the warmup region information. Otherwise None.""" + return self._warmup + + def get_pc_count_pairs(self) -> List[PcCountPair]: + """Returns the PC count pairs for this Looppoint region.""" + pc_count_pairs = self.get_simulation().get_pc_count_pairs() + if self.get_warmup(): + pc_count_pairs.extend(self.get_warmup().get_pc_count_pairs()) + return pc_count_pairs + + def update_relatives_counts(self, manager: PcCountTrackerManager) -> None: + """Updates the relative counds of this Looppoint region.""" + self.get_simulation().update_relatives_counts( + manager=manager, include_start=bool(self.get_warmup()) + ) + + def get_start(self) -> PcCountPair: + """Returns the correct starting PcCountPair for this Looppoint + region.""" + if self.get_warmup(): + return self.get_warmup().get_start() + return self.get_simulation().get_start().get_pc_count_pair() + + def to_json(self) -> Dict: + """Returns this class in a JSON structure which can then be serialized + and later be restored from.""" + to_return = { + "simulation": self.get_simulation().to_json(), + "multiplier": self.get_multiplier(), + } + if self.get_warmup(): + to_return["warmup"] = self.get_warmup().to_json() + return to_return + + +class Looppoint: + """Stores all the Looppoint information for a gem5 workload.""" + + def __init__(self, regions: Dict[Union[str, int], LooppointRegion]): + """ + :param regions: A dictionary mapping the region_ids with the + LooppointRegions. + """ + self._regions = regions + self._manager = PcCountTrackerManager() + self._manager.targets = self.get_targets() + + def set_target_region_id(self, region_id: Union[str, int]) -> None: + """There are use-cases where we want to obtain a looppoint data + structure containing a single target region via its ID. This function + will remove all irrelevant regions.""" + + if region_id not in self._regions: + raise Exception(f"Region ID '{region_id}' cannot be found.") + + to_remove = [rid for rid in self._regions if rid is not region_id] + for rid in to_remove: + del self._regions[rid] + + self._manager.targets = self.get_targets() + + def get_manager(self) -> PcCountTrackerManager: + """Returns the PcCountTrackerManager for this Looppoint data + structure.""" + return self._manager + + def get_regions(self) -> Dict[Union[int, str], LooppointRegion]: + """Returns the regions for this Looppoint data structure.""" + return self._regions + + def setup_processor( + self, + processor: "AbstractProcessor", + ) -> None: + """ + A function is used to setup a PC tracker in all the cores and + connect all the tracker to the PC tracker manager to perform + multithread PC tracking. + + :param processor: The processor used in the simulation configuration. + """ + for core in processor.get_cores(): + core.add_pc_tracker_probe(self.get_targets(), self.get_manager()) + + def update_relatives_counts(self) -> None: + """ + Updates the relative count for restore usage. The new relative count + will be stored in relevant data structures. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + region_id = region_start_map[current_pair] + self.get_regions()[region_id].update_relatives_counts( + manager=self.get_manager() + ) + + def get_current_region(self) -> Optional[Union[str, int]]: + """Returns the region id if the current PC Count pair if significant + (e.g. beginning of the checkpoint), otherwise, it returns None to + indicate the current PC Count pair is not significant. + """ + current_pair = self.get_current_pair() + region_start_map = self.get_region_start_id_map() + if current_pair in region_start_map: + return region_start_map[current_pair] + return None + + def get_current_pair(self) -> PcCountPair: + """This function returns the current PC Count pair.""" + return self.get_manager().getCurrentPcCountPair() + + def get_region_start_id_map(self) -> Dict[PcCountPair, Union[int, str]]: + """Returns the starting PcCountPairs mapped to the corresponding region + IDs. This is a helper function for quick mapping of PcCountPairs to + region IDs.""" + + regions = {} + for rid in self.get_regions(): + regions[self.get_regions()[rid].get_start()] = rid + + return regions + + def get_targets(self) -> List[PcCountPair]: + """Returns the complete list of target PcCountPairs. That is, the + PcCountPairs each region starts with as well as the relevant warmup + intervals.""" + targets = [] + for rid in self.get_regions(): + targets.extend(self.get_regions()[rid].get_pc_count_pairs()) + + return targets + + def to_json(self) -> Dict[Union[int, str], Dict]: + """Returns this data-structure as a dictionary for serialization via + the `output_json_file` function.""" + to_return = {} + for region_id in self.get_regions(): + to_return[region_id] = self.get_regions()[region_id].to_json() + return to_return + + def output_json_file( + self, + input_indent: int = 4, + filepath: str = os.path.join(m5.options.outdir, "looppoint.json"), + ) -> Dict[int, Dict]: + """ + This function is used to output the _json_file into a json file + + :param input_indent: the indent value of the json file + :param filepath: the path of the output json file + """ + with open(filepath, "w") as file: + json.dump(self.to_json(), file, indent=input_indent) + + +class LooppointCsvLoader(Looppoint): + """This class will create a Looppoint data structure from data extracted + from a Looppoint pinpoints file.""" + + def __init__( + self, + pinpoints_file: Union[Path, str], + region_id: Optional[Union[str, int]] = None, + ): + """ + :params pinpoints_file: The pinpoints file in which the data is to be + expected. + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + regions = {} + warmups = {} + + _path = ( + pinpoints_file + if isinstance(pinpoints_file, Path) + else Path(pinpoints_file) + ) + + # This section is hard-coded to parse the data in the csv file. + # The csv file is assumed to have a constant format. + with open(_path, newline="") as csvfile: + reader = csv.reader(csvfile, delimiter=" ", quotechar="|") + for row in reader: + if len(row) > 1: + if row[0] == "cluster": + # if it is a simulation region + line = row[4].split(",") + + rid = int(line[2]) + + region_start = LooppointRegionPC( + pc=int(line[3], 16), + globl=int(line[6]), + # From the CSV's I've observed, the start relative + # value is never set, while the end is always set. + # Given limited information, I can only determine + # this is a rule of how the CSV is setup. + relative=None, + ) + + region_end = LooppointRegionPC( + pc=int(line[7], 16), + globl=int(line[10]), + relative=int(line[11]), + ) + + simulation = LooppointSimulation( + start=region_start, end=region_end + ) + + multiplier = float(line[14]) + + region = LooppointRegion( + simulation=simulation, multiplier=multiplier + ) + + regions[rid] = region + + elif row[0] == "Warmup": + line = row[3].split(",") + rid = int(line[0]) + start = PcCountPair(int(line[3], 16), int(line[6])) + end = PcCountPair(int(line[7], 16), int(line[10])) + + warmup = LooppointRegionWarmup(start=start, end=end) + warmups[rid] = warmup + + for rid in warmups: + if rid not in regions: + raise Exception( + "Warmup region ID '{rid}' does not have a " + "corresponding region." + ) + regions[rid]._warmup = warmups[rid] + + super().__init__(regions=regions) + + if region_id: + self.set_target_region_id(region_id=region_id) + + +class LooppointJsonLoader(Looppoint): + """This class will create a generate a Looppoint data structure from data + extracted from a Looppoint json file.""" + + def __init__( + self, + looppoint_file: Union[str, Path], + region_id: Optional[Union[str, int]] = None, + ) -> None: + """ + :param looppoint_file: a json file generated by gem5 that has all the + LoopPoint data information + :params region_id: If set, will only load the specified region data. + Otherwise, all region info is loaded. Is used when restoring to a + particular region. + """ + + _path = ( + looppoint_file + if isinstance(looppoint_file, Path) + else Path(looppoint_file) + ) + + regions = {} + with open(_path) as file: + json_contents = json.load(file) + for rid in json_contents: + + start_pc = int(json_contents[rid]["simulation"]["start"]["pc"]) + start_globl = int( + json_contents[rid]["simulation"]["start"]["global"] + ) + start_relative = ( + int(json_contents[rid]["simulation"]["start"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["start"] + else None + ) + start = LooppointRegionPC( + pc=start_pc, + globl=start_globl, + relative=start_relative, + ) + + end_pc = int(json_contents[rid]["simulation"]["end"]["pc"]) + end_globl = int( + json_contents[rid]["simulation"]["end"]["global"] + ) + end_relative = ( + int(json_contents[rid]["simulation"]["end"]["relative"]) + if "relative" in json_contents[rid]["simulation"]["end"] + else None + ) + end = LooppointRegionPC( + pc=end_pc, + globl=end_globl, + relative=end_relative, + ) + simulation = LooppointSimulation(start=start, end=end) + multiplier = float(json_contents[rid]["multiplier"]) + warmup = None + if "warmup" in json_contents[rid]: + start = PcCountPair( + json_contents[rid]["warmup"]["start"]["pc"], + json_contents[rid]["warmup"]["start"]["count"], + ) + end = PcCountPair( + json_contents[rid]["warmup"]["end"]["pc"], + json_contents[rid]["warmup"]["end"]["count"], + ) + warmup = LooppointRegionWarmup(start=start, end=end) + + regions[rid] = LooppointRegion( + simulation=simulation, multiplier=multiplier, warmup=warmup + ) + + super().__init__(regions=regions) + if region_id: + self.set_target_region_id(region_id=region_id) diff --git a/src/python/gem5/simulate/exit_event_generators.py b/src/python/gem5/simulate/exit_event_generators.py index 82eba17543..37998d3a9b 100644 --- a/src/python/gem5/simulate/exit_event_generators.py +++ b/src/python/gem5/simulate/exit_event_generators.py @@ -29,7 +29,7 @@ import m5.stats from ..components.processors.abstract_processor import AbstractProcessor from ..components.processors.switchable_processor import SwitchableProcessor from ..resources.resource import SimpointResource -from gem5.utils.looppoint import LoopPoint +from gem5.resources.looppoint import Looppoint from m5.util import warn from pathlib import Path @@ -172,7 +172,7 @@ def simpoints_save_checkpoint_generator( def looppoint_save_checkpoint_generator( checkpoint_dir: Path, - looppoint: LoopPoint, + looppoint: Looppoint, update_relatives: bool = True, exit_when_empty: bool = True, ): @@ -203,7 +203,7 @@ def looppoint_save_checkpoint_generator( # will return an integer greater than 0. By significant PC Count pair, # it means the PC Count pair that indicates where to take the # checkpoint at. This is determined in the LoopPoint module. - if region != -1: + if region: if update_relatives: looppoint.update_relatives_counts() m5.checkpoint((checkpoint_dir / f"cpt.Region{region}").as_posix()) diff --git a/src/python/gem5/utils/looppoint.py b/src/python/gem5/utils/looppoint.py deleted file mode 100644 index 8e01e3030f..0000000000 --- a/src/python/gem5/utils/looppoint.py +++ /dev/null @@ -1,384 +0,0 @@ -# Copyright (c) 2022 The Regents of the University of California -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer; -# redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution; -# neither the name of the copyright holders nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -from m5.util import fatal -from m5.params import PcCountPair -from pathlib import Path -from typing import List, Dict, Tuple -from gem5.components.processors.abstract_processor import AbstractProcessor -from m5.objects import PcCountTrackerManager -import csv -import re -import json -import m5 -import os - - -class LoopPoint: - """ - This LoopPoint class is used to manage the information needed for LoopPoint - in workload - """ - - def __init__( - self, - targets: List[PcCountPair], - regions: Dict[PcCountPair, int], - json_file: Dict[int, Dict], - ) -> None: - """ - :param targets: a list of PcCountPair that are used to generate exit - event at when the PcCountTrackerManager encounter this PcCountPair in - execution - :param regions: a dictionary used to find the corresponding region id - for the significant PcCountPair. This is mainly used to ensure - checkpoints are taken in the correct PcCountPair or relative counts are - updated at the correct count - :param json_file: all the LoopPoint data including relative counts and - multiplier are stored in this parameter. It can be outputted as a json - file. - """ - - self._manager = PcCountTrackerManager() - self._manager.targets = targets - self._targets = targets - self._regions = regions - self._json_file = json_file - - def setup_processor( - self, - processor: AbstractProcessor, - ) -> None: - """ - This function is used to setup a PC tracker in all the cores and - connect all the tracker to the PC tracker manager to perform - multithread PC tracking - :param processor: the processor used in the simulation configuration - """ - for core in processor.get_cores(): - core.add_pc_tracker_probe(self._targets, self._manager) - - def update_relatives_counts(self) -> None: - """ - This function is used to update the relative count for restore used. - The new relative count will be stored in the _json_file and can be - outputted into a json file by calling the output_json_file function. - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - rid = self._regions[current_pair] - region = self._json_file[rid]["simulation"] - if "warmup" in self._json_file[rid]: - # if this region has a warmup interval, - # then update the relative count for the - # start of the simulation region - start = region["start"]["pc"] - temp = region["start"]["global"] - self._manager.getPcCount( - start - ) - self._json_file[rid]["simulation"]["start"]["relative"] = int( - temp - ) - end = region["end"]["pc"] - temp = region["end"]["global"] - self._manager.getPcCount(end) - self._json_file[rid]["simulation"]["end"]["relative"] = int(temp) - - def output_json_file( - self, - input_indent: int = 4, - filepath: str = os.path.join(m5.options.outdir, "outdir.json"), - ) -> Dict[int, Dict]: - """ - This function is used to output the _json_file into a json file - :param input_indent: the indent value of the json file - :param filepath: the path of the output file - """ - with open(filepath, "w") as file: - json.dump(self._json_file, file, indent=input_indent) - - def get_current_region(self) -> int: - """ - This function returns the region id if the current PC Count pair is - significant(e.x. beginning of the checkpoint), otherwise, it returns - a '-1' to indicate the current PC Count pair is not significant - """ - current_pair = self._manager.getCurrentPcCountPair() - if current_pair in self._regions: - return self._regions[current_pair] - return -1 - - def get_current_pair(self) -> PcCountPair: - """ - This function returns the current PC Count pair - """ - return self._manager.getCurrentPcCountPair() - - def get_regions(self) -> Dict[PcCountPair, int]: - """ - This function returns the complete dictionary of _regions - """ - return self._regions - - def get_targets(self) -> List[PcCountPair]: - """ - This function returns the complete list of _targets - """ - return self._targets - - -class LoopPointCheckpoint(LoopPoint): - def __init__(self, looppoint_file: Path, if_csv: bool) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generate the information needed to take checkpoints for LoopPoint - regions(warmup region+simulation region) - :param looppoint_file: the director of the LoopPoint data file - :param if_csv: if the file is a csv file, then it is True. If the file - is a json file, then it is False - """ - - _json_file = {} - _targets = [] - _region_id = {} - - if if_csv: - self.profile_csv(looppoint_file, _targets, _json_file, _region_id) - else: - self.profile_json(looppoint_file, _targets, _json_file, _region_id) - - super().__init__( - _targets, - _region_id, - _json_file, - ) - - def profile_csv( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the csv LoopPoint data file into three variables - to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - # This section is hard-coded to parse the data in the csv file. - # The csv file is assumed to have a constant format. - with open(looppoint_file_path, newline="") as csvfile: - reader = csv.reader(csvfile, delimiter=" ", quotechar="|") - for row in reader: - if len(row) > 1: - if row[0] == "cluster": - # if it is a simulation region - line = row[4].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[2]) in json_file: - # if this region was created in the json_file - json_file[int(line[2])]["simulation"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[2])] = { - "simulation": { - "start": {"pc": int(line[3], 16)} - } - } - json_file[int(line[2])]["simulation"]["start"][ - "global" - ] = int(line[6]) - json_file[int(line[2])]["simulation"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[2])]["simulation"]["end"][ - "global" - ] = int(line[10]) - json_file[int(line[2])]["multiplier"] = float(line[14]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - elif row[0] == "Warmup": - line = row[3].split(",") - start = PcCountPair(int(line[3], 16), int(line[6])) - end = PcCountPair(int(line[7], 16), int(line[10])) - if int(line[0]) in json_file: - json_file[int(line[0])]["warmup"] = { - "start": {"pc": int(line[3], 16)} - } - else: - json_file[int(line[0])] = { - "warmup": {"start": {"pc": int(line[3], 16)}} - } - json_file[int(line[0])]["warmup"]["start"][ - "count" - ] = int(line[6]) - json_file[int(line[0])]["warmup"]["end"] = { - "pc": int(line[7], 16) - } - json_file[int(line[0])]["warmup"]["end"][ - "count" - ] = int(line[10]) - targets.append(start) - targets.append(end) - # store all the PC Count pairs from the file to the - # targets list - - for rid, region in json_file.items(): - # this loop iterates all the regions and find the significant PC - # Count pair for the region - if "warmup" in region: - # if the region has a warmup interval, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - else: - # if the region does not have a warmup interval, then the - # checkpoint should be taken at the start of the simulation - # region - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - region_id[start] = rid - - def profile_json( - self, - looppoint_file_path: Path, - targets: List[PcCountPair], - json_file: Dict[int, Dict], - region_id: Dict[PcCountPair, int], - ) -> None: - """ - This function profiles the json LoopPoint data file into three - variables to take correct checkpoints for LoopPoint - :param looppoint_file_path: the director of the LoopPoint data file - :param targets: a list of PcCountPair - :param json_file: a dictionary for all the LoopPoint data - :param region_id: a dictionary for all the significant PcCountPair and - its corresponding region id - """ - - with open(looppoint_file_path) as file: - json_file = json.load(file) - # load all json information into the json_file variable - for rid, region in json_file.items(): - # iterates all regions - sim_start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["global"], - ) - targets.append(sim_start) - # store all PC Count pairs in the file into targets list - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["global"], - ) - targets.append(end) - if "warmup" in region: - # if there is a warmup in the region, then the checkpoint - # should be taken at the start of the warmup interval - start = PcCountPair( - region["warmup"]["start"]["pc"], - region["warmup"]["start"]["count"], - ) - targets.append(start) - end = PcCountPair( - region["warmup"]["end"]["pc"], - region["warmup"]["end"]["count"], - ) - targets.append(end) - else: - # if there is not a warmup interval in the region, then the - # checkpoint should be taken at the start of the simulation - # region - start = sim_start - region_id[start] = rid - - -class LoopPointRestore(LoopPoint): - def __init__(self, looppoint_file: Path, region_id: int) -> None: - """ - This class is specifically designed to take in the LoopPoint data file - and generator information needed to restore a checkpoint taken by the - LoopPointCheckPoint. - :param looppoint_file: a json file generated by gem5 that has all the - LoopPoint data information - :param region_id: The region ID we will be restoring to. - """ - - with open(looppoint_file) as file: - json_file = json.load(file) - - targets, regions = self.get_region( - json_file=json_file, region_id=region_id - ) - - super().__init__(targets=targets, regions=regions, json_file=json_file) - - def get_region( - self, json_file: Dict[int, Dict], region_id: int - ) -> Tuple[List[PcCountPair], Dict[PcCountPair, int]]: - to_return_region = {} - to_return_targets = [] - - if region_id not in json_file: - # if the region id does not exist in the LoopPoint data - # file raise a fatal message - fatal(f"{region_id} is not a valid region\n") - region = json_file[region_id] - if "warmup" in region: - if "relative" not in region["simulation"]["start"]: - # if there are not relative counts for the PC Count - # pair then it means there is not enough information to - # restore this checkpoint - fatal(f"region {region_id} doesn't have relative count info\n") - start = PcCountPair( - region["simulation"]["start"]["pc"], - region["simulation"]["start"]["relative"], - ) - to_return_region[start] = region_id - to_return_targets.append(start) - if "relative" not in region["simulation"]["end"]: - fatal(f"region {region_id} doesn't have relative count info\n") - end = PcCountPair( - region["simulation"]["end"]["pc"], - region["simulation"]["end"]["relative"], - ) - to_return_region[end] = region_id - to_return_targets.append(end) - - return to_return_targets, to_return_region