diff --git a/components_library/boards/simple_board.py b/components_library/boards/simple_board.py index 8ecaefcdec..86e6890da6 100644 --- a/components_library/boards/simple_board.py +++ b/components_library/boards/simple_board.py @@ -24,6 +24,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from components_library.resources.resource import AbstractResource from m5.objects import ( AddrRange, SrcClockDomain, @@ -145,7 +146,7 @@ class SimpleBoard(AbstractBoard): self.mem_ranges = [AddrRange(memory.get_size())] memory.set_memory_range(self.mem_ranges) - def set_workload(self, binary: str) -> None: + def set_workload(self, binary: AbstractResource) -> None: """Set up the system to run a specific binary. **Limitations** @@ -153,11 +154,11 @@ class SimpleBoard(AbstractBoard): * Dynamically linked executables are partially supported when the host ISA and the simulated ISA are the same. - :param binary: The path on the *host* to the binary to run in gem5. + :param binary: The resource encapsulating the binary to be run. """ - self.workload = SEWorkload.init_compatible(binary) + self.workload = SEWorkload.init_compatible(binary.get_local_path()) process = Process() - process.cmd = [binary] + process.cmd = [binary.get_local_path()] self.get_processor().get_cores()[0].set_workload(process) diff --git a/components_library/boards/x86_board.py b/components_library/boards/x86_board.py index dd9ad13af0..64703799cd 100644 --- a/components_library/boards/x86_board.py +++ b/components_library/boards/x86_board.py @@ -25,6 +25,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +from components_library.resources.resource import AbstractResource from components_library.utils.override import overrides from components_library.boards.abstract_board import AbstractBoard from components_library.isas import ISA @@ -273,7 +274,10 @@ class X86Board(SimpleBoard): def set_workload( - self, kernel: str, disk_image: str, command: Optional[str] = None + self, + kernel: AbstractResource, + disk_image: AbstractResource, + command: Optional[str] = None, ): """Setup the full system files @@ -287,14 +291,14 @@ class X86Board(SimpleBoard): * Only supports a Linux kernel * Disk must be configured correctly to use the command option - :param kernel: The compiled kernel binary - :param disk_image: A disk image containing the OS data. The first - partition should be the root partition. + :param kernel: The compiled kernel binary resource + :param disk_image: A disk image resource containing the OS data. The + first partition should be the root partition. :param command: The command(s) to run with bash once the OS is booted """ # Set the Linux kernel to use. - self.workload.object_file = kernel + self.workload.object_file = kernel.get_local_path() # Options specified on the kernel command line. self.workload.command_line = " ".join( @@ -312,7 +316,7 @@ class X86Board(SimpleBoard): ide_disk.image = CowDiskImage( child=RawDiskImage(read_only=True), read_only=False ) - ide_disk.image.child.image_file = disk_image + ide_disk.image.child.image_file = disk_image.get_local_path() # Attach the SimObject to the system. self.pc.south_bridge.ide.disks = [ide_disk] diff --git a/components_library/resources/downloader.py b/components_library/resources/downloader.py new file mode 100644 index 0000000000..51637243be --- /dev/null +++ b/components_library/resources/downloader.py @@ -0,0 +1,255 @@ +# Copyright (c) 2021 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json +import urllib.request +import hashlib +import os +import shutil +import gzip +import hashlib +import base64 +from typing import List, Dict, Optional + +from ..utils.filelock import FileLock + +""" +This Python module contains functions used to download, list, and obtain +information about resources from resources.gem5.org. +""" + + +def _get_resources_json_uri() -> str: + # TODO: This is hardcoded to develop. This will need updated for each + # release to the stable branch. + uri = ( + "https://gem5.googlesource.com/public/gem5-resources/" + + "+/refs/heads/develop/resources.json?format=TEXT" + ) + + return uri + + +def _get_resources_json() -> Dict: + """ + Gets the Resources JSON. + + :returns: The Resources JSON (as a Python Dictionary). + """ + + # Note: Google Source does not properly support obtaining files as raw + # text. Therefore when we open the URL we receive the JSON in base64 + # format. Conversion is needed before it can be loaded. + with urllib.request.urlopen(_get_resources_json_uri()) as url: + return json.loads(base64.b64decode(url.read()).decode("utf-8")) + + +def _get_resources(resources_group: Dict) -> Dict[str, Dict]: + """ + A recursive function to get all the resources. + + :returns: A dictionary of resource names to the resource JSON objects. + """ + + to_return = {} + for resource in resources_group: + if resource["type"] == "artifact": + # If the type is "artifact" then we add it directly to the map + # after a check that the name is unique. + if resource["name"] in to_return.keys(): + raise Exception( + "Error: Duplicate artifact with name '{}'.".format( + resource["name"] + ) + ) + to_return[resource["name"]] = resource + elif resource["type"] == "group": + # If it's a group we get recursive. We then check to see if there + # are any duplication of keys. + new_map = _get_resources(resource["contents"]) + intersection = set(new_map.keys()).intersection(to_return.keys()) + if len(intersection) > 0: + # Note: if this error is received it's likely an error with + # the resources.json file. The resources names need to be + # unique keyes. + raise Exception( + "Error: Duplicate artifacts with names: {}.".format( + str(intersection) + ) + ) + to_return.update(new_map) + else: + raise Exception( + "Error: Unknown type '{}'.".format(resource["type"]) + ) + + return to_return + + +def _get_md5(file: str) -> str: + """ + Gets the md5 of a file. + + :param file: The file needing an md5 value. + + :returns: The md5 of the input file. + """ + + # Note: This code is slightly more complex than you might expect as + # `hashlib.md5()` returns malloc errors for large files (such as + # disk images). + md5_object = hashlib.md5() + block_size = 128 * md5_object.block_size + a_file = open(file, "rb") + chunk = a_file.read(block_size) + + while chunk: + md5_object.update(chunk) + chunk = a_file.read(block_size) + + return md5_object.hexdigest() + + +def _download(url: str, download_to: str) -> None: + """ + Downloads a file. + + :param url: The URL of the file to download. + + :param download_to: The location the downloaded file is to be stored. + """ + + # TODO: This whole setup will only work for single files we can get via + # wget. We also need to support git clones going forward. + urllib.request.urlretrieve(url, download_to) + + +def list_resources() -> List[str]: + """ + Lists all available resources by name. + + :returns: A list of resources by name. + """ + return _get_resources(_get_resources_json()["resources"]).keys() + + +def get_resources_json_obj(resource_name: str) -> Dict: + """ + Get a JSON object of a specified resource. + + :param resource_name: The name of the resource. + + :returns: The JSON object (in the form of a dictionary). + + :raises Exception: An exception is raised if the specified resources does + not exist. + """ + artifact_map = _get_resources(_get_resources_json()["resources"]) + + if resource_name not in artifact_map: + raise Exception( + "Error: Resource with name '{}' does not exist".format( + resource_name + ) + ) + + return artifact_map[resource_name] + + +def get_resource( + resource_name: str, + to_path: str, + unzip: Optional[bool] = True, + override: Optional[bool] = False, +) -> None: + """ + Obtains a gem5 resource and stored it to a specified location. If the + specified resource is already at the location, no action is taken. + + :param resource_name: The resource to be obtained. + + :param to_path: The location in the file system the resource is to be + stored. The filename should be included. + + :param unzip: If true, gzipped resources will be unzipped prior to saving + to `to_path`. True by default. + + :param override: If a resource is present with an incorrect hash (e.g., + an outdated version of the resource is present), `get_resource` will delete + this local resource and re-download it if this parameter is True. False by + default. + + :raises Exception: An exception is thrown if a file is already present at + `to_path` but it does not have the correct md5 sum. An exception will also + be thrown is a directory is present at `to_path` + """ + + # We apply a lock for a specific resource. This is to avoid circumstances + # where multiple instances of gem5 are running and trying to obtain the + # same resources at once. The timeout here is somewhat arbitarily put at 15 + # minutes.Most resources should be downloaded and decompressed in this + # timeframe, even on the most constrained of systems. + with FileLock("{}.lock".format(to_path), timeout=900): + + resource_json = get_resources_json_obj(resource_name) + + if os.path.exists(to_path): + + if not os.path.isfile(to_path): + raise Exception( + "There is a directory at '{}'.".format(to_path) + ) + + if _get_md5(to_path) == resource_json["md5sum"]: + # In this case, the file has already been download, no need to + # do so again. + return + elif override: + os.remove(to_path) + else: + raise Exception( + "There already a file present at '{}' but " + "its md5 value is invalid.".format(to_path) + ) + + download_dest = to_path + run_unzip = unzip and resource_json["is_zipped"].lower() == "true" + if run_unzip: + download_dest += ".gz" + + # TODO: Might be nice to have some kind of download status bar here. + # TODO: There might be a case where this should be silenced. + print("'{}' not found locally. Downloading...".format(resource_name)) + _download(url=resource_json["url"], download_to=download_dest) + print("Finished downloading '{}'.".format(resource_name)) + + if run_unzip: + print("Decompressing '{}'...".format(resource_name)) + with gzip.open(download_dest, "rb") as f: + with open(to_path, "wb") as o: + shutil.copyfileobj(f, o) + os.remove(download_dest) + print("Finished decompressing '{}.".format(resource_name)) diff --git a/components_library/resources/resource.py b/components_library/resources/resource.py new file mode 100644 index 0000000000..4e7a459328 --- /dev/null +++ b/components_library/resources/resource.py @@ -0,0 +1,109 @@ +# Copyright (c) 2021 The Regents of the University of California +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from abc import ABCMeta +import os + +from .downloader import get_resource + +from typing import Optional + +""" +A Resource object encapsulates a gem5 resource. Resources are items needed to +run a simulation, such as a disk image, kernel, or binary. The gem5 project +provides pre-built resources, with sources, at . + +The purpose of this encapsulation is two fold: + +1. It allows automatic retrieval of gem5 resources. E.g., specifying a resource + which is not local will initiate a download. +2. It provides a location where code may be added to record the resources used + within a simulation. At present this is a TODO work-item. +""" + + +class AbstractResource: + + __metaclass__ = ABCMeta + + def __init__(self, local_path: str): + self._local_path = local_path + + def get_local_path(self) -> str: + return self._local_path + + +class CustomResource(AbstractResource): + """ + A custom gem5 resource. This can be used to encapsulate a resource provided + by a gem5 user as opposed to one available within the gem5 resources + repository. + """ + + def __init__(self, local_path: str): + """ + :param local_path: The path of the resource on the host system. + """ + super().__init__(local_path=local_path) + + +class Resource(AbstractResource): + """ + An official gem5 resources as hosted within our gem5 resources repository + (). + + A user need only specify the name of the resource during construction. The + resource will be downloaded if needed. A list of available resources can + be obtained via `downloader.list_resources()`. + """ + + def __init__( + self, + resource_name: str, + resource_directory: Optional[str] = None, + override: Optional[bool] = False, + ): + """ + :param resource_name: The name of the gem5 resource. + :param resource_directory: The location of the directory in which the + resource is to be stored. + :param override: If the resource is present, but does not have the + correct md5 value, the resoruce will be deleted and re-downloaded if + this value is True. Otherwise an exception will be thrown. False by + default. + """ + + if resource_directory != None: + if not os.path.exists(resource_directory): + os.makedirs(resource_directory) + to_path = os.path.join(resource_directory, resource_name) + else: + to_path = resource_name + + super(Resource, self).__init__(local_path=to_path) + get_resource( + resource_name=resource_name, to_path=to_path, override=override + ) diff --git a/components_library/utils/filelock.py b/components_library/utils/filelock.py new file mode 100644 index 0000000000..82e1122bf9 --- /dev/null +++ b/components_library/utils/filelock.py @@ -0,0 +1,111 @@ +# Copyright (c) 2009, Evan Fosmark +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import time +import errno + + +class FileLockException(Exception): + pass + + +class FileLock(object): + """A file locking mechanism that has context-manager support so + you can use it in a with statement. This should be relatively cross + compatible as it doesn't rely on msvcrt or fcntl for the locking. + """ + + def __init__(self, file_name, timeout=10, delay=0.05): + """Prepare the file locker. Specify the file to lock and optionally + the maximum timeout and the delay between each attempt to lock. + """ + if timeout is not None and delay is None: + raise ValueError( + "If timeout is not None, then delay must not be None." + ) + self.is_locked = False + self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name) + self.file_name = file_name + self.timeout = timeout + self.delay = delay + + def acquire(self): + """Acquire the lock, if possible. If the lock is in use, it check again + every `wait` seconds. It does this until it either gets the lock or + exceeds `timeout` number of seconds, in which case it throws + an exception. + """ + start_time = time.time() + while True: + try: + self.fd = os.open( + self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR + ) + self.is_locked = True # moved to ensure tag only when locked + break + except OSError as e: + if e.errno != errno.EEXIST: + raise + if self.timeout is None: + raise FileLockException( + "Could not acquire lock on {}".format(self.file_name) + ) + if (time.time() - start_time) >= self.timeout: + raise FileLockException("Timeout occured.") + time.sleep(self.delay) + + # self.is_locked = True + + def release(self): + """Get rid of the lock by deleting the lockfile. + When working in a `with` statement, this gets automatically + called at the end. + """ + if self.is_locked: + os.close(self.fd) + os.unlink(self.lockfile) + self.is_locked = False + + def __enter__(self): + """Activated when used in the with statement. + Should automatically acquire a lock to be used in the with block. + """ + if not self.is_locked: + self.acquire() + return self + + def __exit__(self, type, value, traceback): + """Activated at the end of the with statement. + It automatically releases the lock if it isn't locked. + """ + if self.is_locked: + self.release() + + def __del__(self): + """Make sure that the FileLock instance doesn't leave a lockfile + lying around. + """ + self.release() diff --git a/configs/example/components-library/boot_exit_disk_run.py b/configs/example/components-library/boot_exit_disk_run.py index 67b0feabfa..ae8f87ca8e 100644 --- a/configs/example/components-library/boot_exit_disk_run.py +++ b/configs/example/components-library/boot_exit_disk_run.py @@ -60,11 +60,9 @@ from components_library.processors.simple_processor import SimpleProcessor from components_library.processors.cpu_types import CPUTypes from components_library.isas import ISA from components_library.coherence_protocol import CoherenceProtocol +from components_library.resources.resource import Resource import os -import subprocess -import gzip -import shutil # Run a check to ensure the right version of gem5 is being used. if ( @@ -110,31 +108,10 @@ motherboard = X86Board( motherboard.connect_things() -# Download the resources as necessary. -thispath = os.path.dirname(os.path.realpath(__file__)) - -kernel_url = ( - "http://dist.gem5.org/dist/v21-0/kernels/x86/static/vmlinux-5.4.49" -) -kernel_path = os.path.join(thispath, "vmlinux-5.4.49") -if not os.path.exists(kernel_path): - subprocess.run(["wget", "-P", thispath, kernel_url]) - -boot_img_url = ( - "http://dist.gem5.org/dist/v21-0/images/x86/ubuntu-18-04/boot-exit.img.gz" -) -boot_img_path_gz = os.path.join(thispath, "boot-exit.img.gz") -boot_img_path = os.path.join(thispath, "boot-exit.img") - -if not os.path.exists(boot_img_path): - subprocess.run(["wget", "-P", thispath, boot_img_url]) - with gzip.open(boot_img_path_gz, "rb") as f: - with open(boot_img_path, "wb") as o: - shutil.copyfileobj(f, o) - # Set the Full System workload. motherboard.set_workload( - kernel=kernel_path, disk_image=boot_img_path, command="m5 exit \n" + kernel=Resource("x86-linux-kernel-5.4.49"), + disk_image=Resource("x86-boot-exit"), command="m5 exit \n" ) diff --git a/configs/example/components-library/parsec_disk_run.py b/configs/example/components-library/parsec_disk_run.py index 9285d6dd78..1570d1c98d 100644 --- a/configs/example/components-library/parsec_disk_run.py +++ b/configs/example/components-library/parsec_disk_run.py @@ -52,6 +52,7 @@ sys.path.append( ) ) +from components_library.resources.resource import Resource from components_library.boards.x86_board import X86Board from components_library.cachehierarchies.classic.\ private_l1_private_l2_cache_hierarchy import ( @@ -67,10 +68,7 @@ from components_library.runtime import ( get_runtime_coherence_protocol, ) -import subprocess -import gzip import time -import shutil import time @@ -110,31 +108,6 @@ motherboard = X86Board( motherboard.connect_things() - -# Download the linux kernel and parsec disk image needed to run the -# simuluation. -thispath = os.path.dirname(os.path.realpath(__file__)) - -kernel_url = ( - "http://dist.gem5.org/dist/v21-0/kernels/x86/static/vmlinux-5.4.49" -) -kernel_path = os.path.join(thispath, "vmlinux-5.4.49") -if not os.path.exists(kernel_path): - subprocess.run(["wget", "-P", thispath, kernel_url]) - -parsec_img_url = ( - "http://dist.gem5.org/dist/v21-0/images/x86/ubuntu-18-04/parsec.img.gz" -) -parsec_img_path_gz = os.path.join(thispath, "parsec.img.gz") -parsec_img_path = os.path.join(thispath, "parsec.img") - -if not os.path.exists(parsec_img_path): - subprocess.run(["wget", "-P", thispath, parsec_img_url]) - with gzip.open(parsec_img_path_gz, "rb") as f: - with open(parsec_img_path, "wb") as o: - shutil.copyfileobj(f, o) - - # The command to run. In this case the blackscholes app with the simsmall # workload. command = "cd /home/gem5/parsec-benchmark\n" @@ -145,7 +118,9 @@ command += "sleep 5 \n" command += "m5 exit \n" motherboard.set_workload( - kernel=kernel_path, disk_image=parsec_img_path, command=command + kernel=Resource("x86-linux-kernel-5.4.49"), + disk_image=Resource("x86-parsec"), + command=command, ) print("Running with ISA: " + get_runtime_isa().name) diff --git a/configs/example/components-library/simple_binary_run.py b/configs/example/components-library/simple_binary_run.py index c73d31e262..212c5cdc45 100644 --- a/configs/example/components-library/simple_binary_run.py +++ b/configs/example/components-library/simple_binary_run.py @@ -48,6 +48,7 @@ sys.path.append( ) ) +from components_library.resources.resource import CustomResource from components_library.boards.simple_board import SimpleBoard from components_library.cachehierarchies.classic.no_cache import NoCache from components_library.memory.single_channel import SingleChannelDDR3_1600 @@ -76,9 +77,10 @@ motherboard.connect_things() # Set the workload thispath = os.path.dirname(os.path.realpath(__file__)) -binary = os.path.join( - thispath, "../../../tests/test-progs/hello/bin/x86/linux/hello" -) +binary = CustomResource(os.path.join( + thispath, + "../../../tests/test-progs/hello/bin/x86/linux/hello" +)) motherboard.set_workload(binary)