python,configs: Add Resource class to gem5 components

The `Resource` class can be used to obtain a gem5 resource. The
`Resource` class, via the `downloader` package, parses the gem5
resources `resources.json` file:
https://gem5.googlesource.com/public/gem5-resources/+/refs/heads/develop/resources.json
From this it can determine the available resources and where to download
them. This allows for automatic retrieval of resources.

The `CustomResource` can be used to specify a local resource not part of
gem5 resources.

The board's `set_workload` function has been updated to use the
resources.

The components library example scripts have been updated to demonstrate
the `Resource`/`CustomResource` class usage.

Issue-on: https://gem5.atlassian.net/browse/GEM5-1022
Change-Id: I59cfe81d5ec9c64576c0dab55af52aede96976fb
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/49304
Reviewed-by: Austin Harris <austin.dane.harris@gmail.com>
Maintainer: Jason Lowe-Power <power.jg@gmail.com>
Tested-by: kokoro <noreply+kokoro@google.com>
This commit is contained in:
Bobby R. Bruce
2021-08-16 15:29:49 -07:00
parent ec91492feb
commit 2ef2f11955
8 changed files with 502 additions and 68 deletions

View File

@@ -24,6 +24,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from components_library.resources.resource import AbstractResource
from m5.objects import (
AddrRange,
SrcClockDomain,
@@ -145,7 +146,7 @@ class SimpleBoard(AbstractBoard):
self.mem_ranges = [AddrRange(memory.get_size())]
memory.set_memory_range(self.mem_ranges)
def set_workload(self, binary: str) -> None:
def set_workload(self, binary: AbstractResource) -> None:
"""Set up the system to run a specific binary.
**Limitations**
@@ -153,11 +154,11 @@ class SimpleBoard(AbstractBoard):
* Dynamically linked executables are partially supported when the host
ISA and the simulated ISA are the same.
:param binary: The path on the *host* to the binary to run in gem5.
:param binary: The resource encapsulating the binary to be run.
"""
self.workload = SEWorkload.init_compatible(binary)
self.workload = SEWorkload.init_compatible(binary.get_local_path())
process = Process()
process.cmd = [binary]
process.cmd = [binary.get_local_path()]
self.get_processor().get_cores()[0].set_workload(process)

View File

@@ -25,6 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from components_library.resources.resource import AbstractResource
from components_library.utils.override import overrides
from components_library.boards.abstract_board import AbstractBoard
from components_library.isas import ISA
@@ -273,7 +274,10 @@ class X86Board(SimpleBoard):
def set_workload(
self, kernel: str, disk_image: str, command: Optional[str] = None
self,
kernel: AbstractResource,
disk_image: AbstractResource,
command: Optional[str] = None,
):
"""Setup the full system files
@@ -287,14 +291,14 @@ class X86Board(SimpleBoard):
* Only supports a Linux kernel
* Disk must be configured correctly to use the command option
:param kernel: The compiled kernel binary
:param disk_image: A disk image containing the OS data. The first
partition should be the root partition.
:param kernel: The compiled kernel binary resource
:param disk_image: A disk image resource containing the OS data. The
first partition should be the root partition.
:param command: The command(s) to run with bash once the OS is booted
"""
# Set the Linux kernel to use.
self.workload.object_file = kernel
self.workload.object_file = kernel.get_local_path()
# Options specified on the kernel command line.
self.workload.command_line = " ".join(
@@ -312,7 +316,7 @@ class X86Board(SimpleBoard):
ide_disk.image = CowDiskImage(
child=RawDiskImage(read_only=True), read_only=False
)
ide_disk.image.child.image_file = disk_image
ide_disk.image.child.image_file = disk_image.get_local_path()
# Attach the SimObject to the system.
self.pc.south_bridge.ide.disks = [ide_disk]

View File

@@ -0,0 +1,255 @@
# Copyright (c) 2021 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import json
import urllib.request
import hashlib
import os
import shutil
import gzip
import hashlib
import base64
from typing import List, Dict, Optional
from ..utils.filelock import FileLock
"""
This Python module contains functions used to download, list, and obtain
information about resources from resources.gem5.org.
"""
def _get_resources_json_uri() -> str:
# TODO: This is hardcoded to develop. This will need updated for each
# release to the stable branch.
uri = (
"https://gem5.googlesource.com/public/gem5-resources/"
+ "+/refs/heads/develop/resources.json?format=TEXT"
)
return uri
def _get_resources_json() -> Dict:
"""
Gets the Resources JSON.
:returns: The Resources JSON (as a Python Dictionary).
"""
# Note: Google Source does not properly support obtaining files as raw
# text. Therefore when we open the URL we receive the JSON in base64
# format. Conversion is needed before it can be loaded.
with urllib.request.urlopen(_get_resources_json_uri()) as url:
return json.loads(base64.b64decode(url.read()).decode("utf-8"))
def _get_resources(resources_group: Dict) -> Dict[str, Dict]:
"""
A recursive function to get all the resources.
:returns: A dictionary of resource names to the resource JSON objects.
"""
to_return = {}
for resource in resources_group:
if resource["type"] == "artifact":
# If the type is "artifact" then we add it directly to the map
# after a check that the name is unique.
if resource["name"] in to_return.keys():
raise Exception(
"Error: Duplicate artifact with name '{}'.".format(
resource["name"]
)
)
to_return[resource["name"]] = resource
elif resource["type"] == "group":
# If it's a group we get recursive. We then check to see if there
# are any duplication of keys.
new_map = _get_resources(resource["contents"])
intersection = set(new_map.keys()).intersection(to_return.keys())
if len(intersection) > 0:
# Note: if this error is received it's likely an error with
# the resources.json file. The resources names need to be
# unique keyes.
raise Exception(
"Error: Duplicate artifacts with names: {}.".format(
str(intersection)
)
)
to_return.update(new_map)
else:
raise Exception(
"Error: Unknown type '{}'.".format(resource["type"])
)
return to_return
def _get_md5(file: str) -> str:
"""
Gets the md5 of a file.
:param file: The file needing an md5 value.
:returns: The md5 of the input file.
"""
# Note: This code is slightly more complex than you might expect as
# `hashlib.md5(<file>)` returns malloc errors for large files (such as
# disk images).
md5_object = hashlib.md5()
block_size = 128 * md5_object.block_size
a_file = open(file, "rb")
chunk = a_file.read(block_size)
while chunk:
md5_object.update(chunk)
chunk = a_file.read(block_size)
return md5_object.hexdigest()
def _download(url: str, download_to: str) -> None:
"""
Downloads a file.
:param url: The URL of the file to download.
:param download_to: The location the downloaded file is to be stored.
"""
# TODO: This whole setup will only work for single files we can get via
# wget. We also need to support git clones going forward.
urllib.request.urlretrieve(url, download_to)
def list_resources() -> List[str]:
"""
Lists all available resources by name.
:returns: A list of resources by name.
"""
return _get_resources(_get_resources_json()["resources"]).keys()
def get_resources_json_obj(resource_name: str) -> Dict:
"""
Get a JSON object of a specified resource.
:param resource_name: The name of the resource.
:returns: The JSON object (in the form of a dictionary).
:raises Exception: An exception is raised if the specified resources does
not exist.
"""
artifact_map = _get_resources(_get_resources_json()["resources"])
if resource_name not in artifact_map:
raise Exception(
"Error: Resource with name '{}' does not exist".format(
resource_name
)
)
return artifact_map[resource_name]
def get_resource(
resource_name: str,
to_path: str,
unzip: Optional[bool] = True,
override: Optional[bool] = False,
) -> None:
"""
Obtains a gem5 resource and stored it to a specified location. If the
specified resource is already at the location, no action is taken.
:param resource_name: The resource to be obtained.
:param to_path: The location in the file system the resource is to be
stored. The filename should be included.
:param unzip: If true, gzipped resources will be unzipped prior to saving
to `to_path`. True by default.
:param override: If a resource is present with an incorrect hash (e.g.,
an outdated version of the resource is present), `get_resource` will delete
this local resource and re-download it if this parameter is True. False by
default.
:raises Exception: An exception is thrown if a file is already present at
`to_path` but it does not have the correct md5 sum. An exception will also
be thrown is a directory is present at `to_path`
"""
# We apply a lock for a specific resource. This is to avoid circumstances
# where multiple instances of gem5 are running and trying to obtain the
# same resources at once. The timeout here is somewhat arbitarily put at 15
# minutes.Most resources should be downloaded and decompressed in this
# timeframe, even on the most constrained of systems.
with FileLock("{}.lock".format(to_path), timeout=900):
resource_json = get_resources_json_obj(resource_name)
if os.path.exists(to_path):
if not os.path.isfile(to_path):
raise Exception(
"There is a directory at '{}'.".format(to_path)
)
if _get_md5(to_path) == resource_json["md5sum"]:
# In this case, the file has already been download, no need to
# do so again.
return
elif override:
os.remove(to_path)
else:
raise Exception(
"There already a file present at '{}' but "
"its md5 value is invalid.".format(to_path)
)
download_dest = to_path
run_unzip = unzip and resource_json["is_zipped"].lower() == "true"
if run_unzip:
download_dest += ".gz"
# TODO: Might be nice to have some kind of download status bar here.
# TODO: There might be a case where this should be silenced.
print("'{}' not found locally. Downloading...".format(resource_name))
_download(url=resource_json["url"], download_to=download_dest)
print("Finished downloading '{}'.".format(resource_name))
if run_unzip:
print("Decompressing '{}'...".format(resource_name))
with gzip.open(download_dest, "rb") as f:
with open(to_path, "wb") as o:
shutil.copyfileobj(f, o)
os.remove(download_dest)
print("Finished decompressing '{}.".format(resource_name))

View File

@@ -0,0 +1,109 @@
# Copyright (c) 2021 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from abc import ABCMeta
import os
from .downloader import get_resource
from typing import Optional
"""
A Resource object encapsulates a gem5 resource. Resources are items needed to
run a simulation, such as a disk image, kernel, or binary. The gem5 project
provides pre-built resources, with sources, at <resources.gem5.org>.
The purpose of this encapsulation is two fold:
1. It allows automatic retrieval of gem5 resources. E.g., specifying a resource
which is not local will initiate a download.
2. It provides a location where code may be added to record the resources used
within a simulation. At present this is a TODO work-item.
"""
class AbstractResource:
__metaclass__ = ABCMeta
def __init__(self, local_path: str):
self._local_path = local_path
def get_local_path(self) -> str:
return self._local_path
class CustomResource(AbstractResource):
"""
A custom gem5 resource. This can be used to encapsulate a resource provided
by a gem5 user as opposed to one available within the gem5 resources
repository.
"""
def __init__(self, local_path: str):
"""
:param local_path: The path of the resource on the host system.
"""
super().__init__(local_path=local_path)
class Resource(AbstractResource):
"""
An official gem5 resources as hosted within our gem5 resources repository
(<resources.gem5.org>).
A user need only specify the name of the resource during construction. The
resource will be downloaded if needed. A list of available resources can
be obtained via `downloader.list_resources()`.
"""
def __init__(
self,
resource_name: str,
resource_directory: Optional[str] = None,
override: Optional[bool] = False,
):
"""
:param resource_name: The name of the gem5 resource.
:param resource_directory: The location of the directory in which the
resource is to be stored.
:param override: If the resource is present, but does not have the
correct md5 value, the resoruce will be deleted and re-downloaded if
this value is True. Otherwise an exception will be thrown. False by
default.
"""
if resource_directory != None:
if not os.path.exists(resource_directory):
os.makedirs(resource_directory)
to_path = os.path.join(resource_directory, resource_name)
else:
to_path = resource_name
super(Resource, self).__init__(local_path=to_path)
get_resource(
resource_name=resource_name, to_path=to_path, override=override
)

View File

@@ -0,0 +1,111 @@
# Copyright (c) 2009, Evan Fosmark
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the <organization> nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import time
import errno
class FileLockException(Exception):
pass
class FileLock(object):
"""A file locking mechanism that has context-manager support so
you can use it in a with statement. This should be relatively cross
compatible as it doesn't rely on msvcrt or fcntl for the locking.
"""
def __init__(self, file_name, timeout=10, delay=0.05):
"""Prepare the file locker. Specify the file to lock and optionally
the maximum timeout and the delay between each attempt to lock.
"""
if timeout is not None and delay is None:
raise ValueError(
"If timeout is not None, then delay must not be None."
)
self.is_locked = False
self.lockfile = os.path.join(os.getcwd(), "%s.lock" % file_name)
self.file_name = file_name
self.timeout = timeout
self.delay = delay
def acquire(self):
"""Acquire the lock, if possible. If the lock is in use, it check again
every `wait` seconds. It does this until it either gets the lock or
exceeds `timeout` number of seconds, in which case it throws
an exception.
"""
start_time = time.time()
while True:
try:
self.fd = os.open(
self.lockfile, os.O_CREAT | os.O_EXCL | os.O_RDWR
)
self.is_locked = True # moved to ensure tag only when locked
break
except OSError as e:
if e.errno != errno.EEXIST:
raise
if self.timeout is None:
raise FileLockException(
"Could not acquire lock on {}".format(self.file_name)
)
if (time.time() - start_time) >= self.timeout:
raise FileLockException("Timeout occured.")
time.sleep(self.delay)
# self.is_locked = True
def release(self):
"""Get rid of the lock by deleting the lockfile.
When working in a `with` statement, this gets automatically
called at the end.
"""
if self.is_locked:
os.close(self.fd)
os.unlink(self.lockfile)
self.is_locked = False
def __enter__(self):
"""Activated when used in the with statement.
Should automatically acquire a lock to be used in the with block.
"""
if not self.is_locked:
self.acquire()
return self
def __exit__(self, type, value, traceback):
"""Activated at the end of the with statement.
It automatically releases the lock if it isn't locked.
"""
if self.is_locked:
self.release()
def __del__(self):
"""Make sure that the FileLock instance doesn't leave a lockfile
lying around.
"""
self.release()

View File

@@ -60,11 +60,9 @@ from components_library.processors.simple_processor import SimpleProcessor
from components_library.processors.cpu_types import CPUTypes
from components_library.isas import ISA
from components_library.coherence_protocol import CoherenceProtocol
from components_library.resources.resource import Resource
import os
import subprocess
import gzip
import shutil
# Run a check to ensure the right version of gem5 is being used.
if (
@@ -110,31 +108,10 @@ motherboard = X86Board(
motherboard.connect_things()
# Download the resources as necessary.
thispath = os.path.dirname(os.path.realpath(__file__))
kernel_url = (
"http://dist.gem5.org/dist/v21-0/kernels/x86/static/vmlinux-5.4.49"
)
kernel_path = os.path.join(thispath, "vmlinux-5.4.49")
if not os.path.exists(kernel_path):
subprocess.run(["wget", "-P", thispath, kernel_url])
boot_img_url = (
"http://dist.gem5.org/dist/v21-0/images/x86/ubuntu-18-04/boot-exit.img.gz"
)
boot_img_path_gz = os.path.join(thispath, "boot-exit.img.gz")
boot_img_path = os.path.join(thispath, "boot-exit.img")
if not os.path.exists(boot_img_path):
subprocess.run(["wget", "-P", thispath, boot_img_url])
with gzip.open(boot_img_path_gz, "rb") as f:
with open(boot_img_path, "wb") as o:
shutil.copyfileobj(f, o)
# Set the Full System workload.
motherboard.set_workload(
kernel=kernel_path, disk_image=boot_img_path, command="m5 exit \n"
kernel=Resource("x86-linux-kernel-5.4.49"),
disk_image=Resource("x86-boot-exit"), command="m5 exit \n"
)

View File

@@ -52,6 +52,7 @@ sys.path.append(
)
)
from components_library.resources.resource import Resource
from components_library.boards.x86_board import X86Board
from components_library.cachehierarchies.classic.\
private_l1_private_l2_cache_hierarchy import (
@@ -67,10 +68,7 @@ from components_library.runtime import (
get_runtime_coherence_protocol,
)
import subprocess
import gzip
import time
import shutil
import time
@@ -110,31 +108,6 @@ motherboard = X86Board(
motherboard.connect_things()
# Download the linux kernel and parsec disk image needed to run the
# simuluation.
thispath = os.path.dirname(os.path.realpath(__file__))
kernel_url = (
"http://dist.gem5.org/dist/v21-0/kernels/x86/static/vmlinux-5.4.49"
)
kernel_path = os.path.join(thispath, "vmlinux-5.4.49")
if not os.path.exists(kernel_path):
subprocess.run(["wget", "-P", thispath, kernel_url])
parsec_img_url = (
"http://dist.gem5.org/dist/v21-0/images/x86/ubuntu-18-04/parsec.img.gz"
)
parsec_img_path_gz = os.path.join(thispath, "parsec.img.gz")
parsec_img_path = os.path.join(thispath, "parsec.img")
if not os.path.exists(parsec_img_path):
subprocess.run(["wget", "-P", thispath, parsec_img_url])
with gzip.open(parsec_img_path_gz, "rb") as f:
with open(parsec_img_path, "wb") as o:
shutil.copyfileobj(f, o)
# The command to run. In this case the blackscholes app with the simsmall
# workload.
command = "cd /home/gem5/parsec-benchmark\n"
@@ -145,7 +118,9 @@ command += "sleep 5 \n"
command += "m5 exit \n"
motherboard.set_workload(
kernel=kernel_path, disk_image=parsec_img_path, command=command
kernel=Resource("x86-linux-kernel-5.4.49"),
disk_image=Resource("x86-parsec"),
command=command,
)
print("Running with ISA: " + get_runtime_isa().name)

View File

@@ -48,6 +48,7 @@ sys.path.append(
)
)
from components_library.resources.resource import CustomResource
from components_library.boards.simple_board import SimpleBoard
from components_library.cachehierarchies.classic.no_cache import NoCache
from components_library.memory.single_channel import SingleChannelDDR3_1600
@@ -76,9 +77,10 @@ motherboard.connect_things()
# Set the workload
thispath = os.path.dirname(os.path.realpath(__file__))
binary = os.path.join(
thispath, "../../../tests/test-progs/hello/bin/x86/linux/hello"
)
binary = CustomResource(os.path.join(
thispath,
"../../../tests/test-progs/hello/bin/x86/linux/hello"
))
motherboard.set_workload(binary)