- This script copies all resources from a mongodb database locally The script creates a resources.json and downloads all the resources. It also updates the resources.json to point to these local downloads instead of the cloud bucket. Change-Id: I15480c4ba82bbf245425205c9c1ab7c0f3501cc3
249 lines
8.0 KiB
Python
249 lines
8.0 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) 2023 The Regents of the University of California
|
|
# All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met: redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer;
|
|
# redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution;
|
|
# neither the name of the copyright holders nor the names of its
|
|
# contributors may be used to endorse or promote products derived from
|
|
# this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
"""
|
|
This is a script to get all resources from the database and save them to
|
|
a json file and download all resources to a directory. The resources in
|
|
the resources.json file that is generated will be updated to point to the
|
|
local resources instead of the database resources. This script is used to
|
|
create the offline database. To use the resources.json with gem5 you need
|
|
to update the gem5 config to point to the resources.json file.
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
from pathlib import Path
|
|
from typing import (
|
|
Dict,
|
|
List,
|
|
TextIO,
|
|
)
|
|
from urllib import (
|
|
parse,
|
|
request,
|
|
)
|
|
|
|
|
|
def get_token(auth_url: str, api_key: str) -> str:
|
|
"""
|
|
This function gets the token from the database using the api key
|
|
:param auth_url: Authentication url to use Atlas data API
|
|
:param api_key: API key to access the database
|
|
:return: Token to access the database
|
|
"""
|
|
|
|
data = {"key": api_key}
|
|
data = json.dumps(data).encode("utf-8")
|
|
|
|
req = request.Request(
|
|
auth_url, data=data, headers={"content-type": "application/json"}
|
|
)
|
|
|
|
try:
|
|
response = request.urlopen(req)
|
|
except Exception as e:
|
|
raise Exception(
|
|
"Failed to obtain token via url request. "
|
|
f"Exception thrown:\n{e}"
|
|
)
|
|
|
|
json_response = json.loads(response.read().decode("utf-8"))
|
|
assert "access_token" in json_response, (
|
|
"'access_token' not key in"
|
|
"json dictionary. JSON: \n"
|
|
f"{json_response}"
|
|
)
|
|
token = json_response["access_token"]
|
|
|
|
return token
|
|
|
|
|
|
def get_all_resources(
|
|
url: str, data_source: str, collection: str, database: str, token: str
|
|
) -> List:
|
|
"""
|
|
This function gets all the JSON objects for resources from the database
|
|
:param url: Database url to use Atlas data API
|
|
:param data_source: Data source name for the mongoDB database
|
|
:param collection: Collection name for the mongoDB database
|
|
:param database: Database name for the mongoDB database
|
|
:param token: Token to access the database
|
|
|
|
:return: List of JSON objects for resources
|
|
"""
|
|
|
|
url = f"{url}/action/find"
|
|
data = {
|
|
"dataSource": data_source,
|
|
"collection": collection,
|
|
"database": database,
|
|
}
|
|
data = json.dumps(data).encode("utf-8")
|
|
headers = {
|
|
"Authorization": f"Bearer {token}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
req = request.Request(url, data=data, headers=headers)
|
|
|
|
try:
|
|
response = request.urlopen(req)
|
|
except Exception as e:
|
|
raise Exception(
|
|
"Failed to obtain resources via url request. "
|
|
f"Exception thrown:\n{e}"
|
|
)
|
|
|
|
json_response = json.loads(response.read().decode("utf-8"))
|
|
assert "documents" in json_response, (
|
|
"'documents' not a key in "
|
|
"json dictionary. JSON: \n"
|
|
f"{json_response}"
|
|
)
|
|
resources = json_response["documents"]
|
|
|
|
return resources
|
|
|
|
|
|
def save_resources_to_file(resources: List[Dict], output: TextIO):
|
|
"""
|
|
This function saves all the JSON objects for resources to a file.
|
|
:param resources: List of JSON objects for resources
|
|
:param output: Output directory absolute path
|
|
"""
|
|
|
|
path = output.joinpath("resources.json")
|
|
|
|
with open(path, "w") as f:
|
|
json.dump(resources, f, indent=4)
|
|
|
|
|
|
def get_resource_local_filepath(
|
|
resource_id: str, resource_version: str, base_output_path: TextIO
|
|
) -> Path:
|
|
"""
|
|
This function returns the local filepath for a resource
|
|
:param resource_url: URL field of the resource
|
|
:param base_output_path: Base output directory absolute path
|
|
:return: Local filepath for the resource
|
|
"""
|
|
filename = f"{resource_id}-{resource_version}"
|
|
|
|
filepath = Path(base_output_path).joinpath(filename)
|
|
|
|
return filepath
|
|
|
|
|
|
def download_resources(resources: List[Dict], output: TextIO):
|
|
"""
|
|
This function downloads the resources which have a url field and
|
|
updates the url field to point to the local download of the resource.
|
|
:param output: Output directory absolute path
|
|
"""
|
|
|
|
path = output.joinpath("offline_resources")
|
|
if not path.exists():
|
|
path.mkdir()
|
|
|
|
for resource in resources:
|
|
if "url" in resource.keys():
|
|
url = resource["url"]
|
|
filepath = get_resource_local_filepath(
|
|
resource["id"], resource["resource_version"], path
|
|
)
|
|
|
|
if not filepath.exists() or ( # If the file does not exist
|
|
"url-md5sum"
|
|
in resource.keys() # If it exist but the md5sum is wrong.
|
|
and hashlib.md5(filepath.read_bytes()).hexdigest()
|
|
!= resource["url-md5sum"]
|
|
):
|
|
print(f"Downloading {url} to {filepath}")
|
|
request.urlretrieve(url, filepath)
|
|
resource["url"] = filepath.absolute().as_uri()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(
|
|
description="Download all the remote gem5 Resources data and files "
|
|
"to be cached and used locally."
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--config-file-path",
|
|
type=str,
|
|
default=Path(__file__)
|
|
.resolve()
|
|
.parent.joinpath("gem5_default_config.json"),
|
|
help="Filepath to the gem5 config file",
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--output-dir",
|
|
type=str,
|
|
default=Path.cwd(),
|
|
help="Output directory path, default is the cwd."
|
|
"The resources.json and all resources will be saved in this directory",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
output_path = Path(args.output_dir)
|
|
|
|
# Get the gem5 config file from the file
|
|
|
|
with open(args.config_file_path) as f:
|
|
gem5_config_json = json.load(f)
|
|
gem5_config_gem5_resources = gem5_config_json["sources"]["gem5-resources"]
|
|
|
|
# Parse the gem5 config
|
|
db_url = gem5_config_gem5_resources["url"]
|
|
data_source = gem5_config_gem5_resources["dataSource"]
|
|
collection_name = gem5_config_gem5_resources["collection"]
|
|
db_name = gem5_config_gem5_resources["database"]
|
|
auth_url = gem5_config_gem5_resources["authUrl"]
|
|
api_key = gem5_config_gem5_resources["apiKey"]
|
|
|
|
if not output_path.exists():
|
|
output_path = output_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
token = get_token(auth_url, api_key)
|
|
|
|
resources = get_all_resources(
|
|
db_url,
|
|
data_source,
|
|
collection_name,
|
|
db_name,
|
|
token,
|
|
)
|
|
|
|
download_resources(resources, output_path)
|
|
|
|
save_resources_to_file(
|
|
resources,
|
|
output_path,
|
|
)
|