Files
gem5/util/offline_db/get-resources-from-db.py
Harshil Patel b42d9fabf7 util: Added script to copy resources from mongodb (#510)
- This script copies all resources from a mongodb database locally The
script creates a resources.json and downloads all the resources. It also
updates the resources.json to point to these local downloads instead of
the cloud bucket.

Change-Id: I15480c4ba82bbf245425205c9c1ab7c0f3501cc3
2023-12-18 12:41:52 -08:00

249 lines
8.0 KiB
Python

#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This is a script to get all resources from the database and save them to
a json file and download all resources to a directory. The resources in
the resources.json file that is generated will be updated to point to the
local resources instead of the database resources. This script is used to
create the offline database. To use the resources.json with gem5 you need
to update the gem5 config to point to the resources.json file.
"""
import argparse
import hashlib
import json
from pathlib import Path
from typing import (
Dict,
List,
TextIO,
)
from urllib import (
parse,
request,
)
def get_token(auth_url: str, api_key: str) -> str:
"""
This function gets the token from the database using the api key
:param auth_url: Authentication url to use Atlas data API
:param api_key: API key to access the database
:return: Token to access the database
"""
data = {"key": api_key}
data = json.dumps(data).encode("utf-8")
req = request.Request(
auth_url, data=data, headers={"content-type": "application/json"}
)
try:
response = request.urlopen(req)
except Exception as e:
raise Exception(
"Failed to obtain token via url request. "
f"Exception thrown:\n{e}"
)
json_response = json.loads(response.read().decode("utf-8"))
assert "access_token" in json_response, (
"'access_token' not key in"
"json dictionary. JSON: \n"
f"{json_response}"
)
token = json_response["access_token"]
return token
def get_all_resources(
url: str, data_source: str, collection: str, database: str, token: str
) -> List:
"""
This function gets all the JSON objects for resources from the database
:param url: Database url to use Atlas data API
:param data_source: Data source name for the mongoDB database
:param collection: Collection name for the mongoDB database
:param database: Database name for the mongoDB database
:param token: Token to access the database
:return: List of JSON objects for resources
"""
url = f"{url}/action/find"
data = {
"dataSource": data_source,
"collection": collection,
"database": database,
}
data = json.dumps(data).encode("utf-8")
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
req = request.Request(url, data=data, headers=headers)
try:
response = request.urlopen(req)
except Exception as e:
raise Exception(
"Failed to obtain resources via url request. "
f"Exception thrown:\n{e}"
)
json_response = json.loads(response.read().decode("utf-8"))
assert "documents" in json_response, (
"'documents' not a key in "
"json dictionary. JSON: \n"
f"{json_response}"
)
resources = json_response["documents"]
return resources
def save_resources_to_file(resources: List[Dict], output: TextIO):
"""
This function saves all the JSON objects for resources to a file.
:param resources: List of JSON objects for resources
:param output: Output directory absolute path
"""
path = output.joinpath("resources.json")
with open(path, "w") as f:
json.dump(resources, f, indent=4)
def get_resource_local_filepath(
resource_id: str, resource_version: str, base_output_path: TextIO
) -> Path:
"""
This function returns the local filepath for a resource
:param resource_url: URL field of the resource
:param base_output_path: Base output directory absolute path
:return: Local filepath for the resource
"""
filename = f"{resource_id}-{resource_version}"
filepath = Path(base_output_path).joinpath(filename)
return filepath
def download_resources(resources: List[Dict], output: TextIO):
"""
This function downloads the resources which have a url field and
updates the url field to point to the local download of the resource.
:param output: Output directory absolute path
"""
path = output.joinpath("offline_resources")
if not path.exists():
path.mkdir()
for resource in resources:
if "url" in resource.keys():
url = resource["url"]
filepath = get_resource_local_filepath(
resource["id"], resource["resource_version"], path
)
if not filepath.exists() or ( # If the file does not exist
"url-md5sum"
in resource.keys() # If it exist but the md5sum is wrong.
and hashlib.md5(filepath.read_bytes()).hexdigest()
!= resource["url-md5sum"]
):
print(f"Downloading {url} to {filepath}")
request.urlretrieve(url, filepath)
resource["url"] = filepath.absolute().as_uri()
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Download all the remote gem5 Resources data and files "
"to be cached and used locally."
)
parser.add_argument(
"--config-file-path",
type=str,
default=Path(__file__)
.resolve()
.parent.joinpath("gem5_default_config.json"),
help="Filepath to the gem5 config file",
)
parser.add_argument(
"--output-dir",
type=str,
default=Path.cwd(),
help="Output directory path, default is the cwd."
"The resources.json and all resources will be saved in this directory",
)
args = parser.parse_args()
output_path = Path(args.output_dir)
# Get the gem5 config file from the file
with open(args.config_file_path) as f:
gem5_config_json = json.load(f)
gem5_config_gem5_resources = gem5_config_json["sources"]["gem5-resources"]
# Parse the gem5 config
db_url = gem5_config_gem5_resources["url"]
data_source = gem5_config_gem5_resources["dataSource"]
collection_name = gem5_config_gem5_resources["collection"]
db_name = gem5_config_gem5_resources["database"]
auth_url = gem5_config_gem5_resources["authUrl"]
api_key = gem5_config_gem5_resources["apiKey"]
if not output_path.exists():
output_path = output_path.mkdir(parents=True, exist_ok=True)
token = get_token(auth_url, api_key)
resources = get_all_resources(
db_url,
data_source,
collection_name,
db_name,
token,
)
download_resources(resources, output_path)
save_resources_to_file(
resources,
output_path,
)