util: Added script to copy resources from mongodb (#510)
- This script copies all resources from a mongodb database locally The script creates a resources.json and downloads all the resources. It also updates the resources.json to point to these local downloads instead of the cloud bucket. Change-Id: I15480c4ba82bbf245425205c9c1ab7c0f3501cc3
This commit is contained in:
committed by
Bobby R. Bruce
parent
27d89379d2
commit
d76a01973a
45
util/offline_db/README.md
Normal file
45
util/offline_db/README.md
Normal file
@@ -0,0 +1,45 @@
|
||||
|
||||
# Offline Database Utility
|
||||
|
||||
This Python script makes local copies of all the gem5 Resource's external files and data.
|
||||
This is useful for cases where an internet connection is not permitted.
|
||||
With this script users can cache a local copy which can be used by gem5.
|
||||
|
||||
Two interconnected parts of the gem5 infrastructure are cached using this script.
|
||||
They are:
|
||||
|
||||
1. **The gem5 Resources database**: When the gem5 Standard Library attempts to construct a gem5 Resource it will reference that resource's entry in the gem5 MongoDB database.
|
||||
It then uses this data to construct the resource.
|
||||
With this script we download this database as a JSON file for gem5 to use in-place of the database.
|
||||
|
||||
2. **gem5 Resource Remote Files**: Resources can (and often do) require files to be downloaded for them to function correctly in gem5.
|
||||
Once the gem5 Resource is constructed from the database entry the gem5 Resource may download the remote files it requires.
|
||||
As a simple example, a `DiskImageResource` will contain disk image partitioning information necessary for a gem5 simulation to be setup correctly.
|
||||
The disk image file is a remote file which is downloaded as this information cannot be stored directly in the database.
|
||||
|
||||
The location of these file is stored within the resource's database entry as a URL.
|
||||
|
||||
To create a local cache of these remote files we download all the remote files and, when creating the JSON file from the database, update the URLs to remote files to File URIs to their local equivalents.
|
||||
|
||||
Once this script is finished running the user will have a directory of all the remote gem5 Resources files and a JSON file representation of the database, with the remote files' URLs updated to local path URIs.
|
||||
|
||||
## Running the Script
|
||||
|
||||
### Arguments
|
||||
- `--config-file-path`: Filepath to the gem5 config file.
|
||||
- `--output-dir`: Output directory path (default: current working directory).
|
||||
|
||||
|
||||
```bash
|
||||
python3 ./get-resources-from-db.py [--config-file-path CONFIG_FILE_PATH] [--output_dir OUTPUT_DIR]
|
||||
```
|
||||
|
||||
## Functionality
|
||||
|
||||
### What is the config file?
|
||||
|
||||
The resources config file represents all the sources.
|
||||
|
||||
The gem5_default_config adds the gem5 resources MongoDB Atlas database as a datasource.
|
||||
|
||||
Documentation on setting up your own data sources can be found here: https://www.gem5.org/documentation/gem5-stdlib/using-local-resources
|
||||
13
util/offline_db/gem5_default_config.json
Normal file
13
util/offline_db/gem5_default_config.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"sources": {
|
||||
"gem5-resources": {
|
||||
"dataSource": "gem5-vision",
|
||||
"database": "gem5-vision",
|
||||
"collection": "resources",
|
||||
"url": "https://data.mongodb-api.com/app/data-ejhjf/endpoint/data/v1",
|
||||
"authUrl": "https://realm.mongodb.com/api/client/v2.0/app/data-ejhjf/auth/providers/api-key/login",
|
||||
"apiKey": "OIi5bAP7xxIGK782t8ZoiD2BkBGEzMdX3upChf9zdCxHSnMoiTnjI22Yw5kOSgy9",
|
||||
"isMongo": true
|
||||
}
|
||||
}
|
||||
}
|
||||
248
util/offline_db/get-resources-from-db.py
Normal file
248
util/offline_db/get-resources-from-db.py
Normal file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright (c) 2023 The Regents of the University of California
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""
|
||||
This is a script to get all resources from the database and save them to
|
||||
a json file and download all resources to a directory. The resources in
|
||||
the resources.json file that is generated will be updated to point to the
|
||||
local resources instead of the database resources. This script is used to
|
||||
create the offline database. To use the resources.json with gem5 you need
|
||||
to update the gem5 config to point to the resources.json file.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import hashlib
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import (
|
||||
Dict,
|
||||
List,
|
||||
TextIO,
|
||||
)
|
||||
from urllib import (
|
||||
parse,
|
||||
request,
|
||||
)
|
||||
|
||||
|
||||
def get_token(auth_url: str, api_key: str) -> str:
|
||||
"""
|
||||
This function gets the token from the database using the api key
|
||||
:param auth_url: Authentication url to use Atlas data API
|
||||
:param api_key: API key to access the database
|
||||
:return: Token to access the database
|
||||
"""
|
||||
|
||||
data = {"key": api_key}
|
||||
data = json.dumps(data).encode("utf-8")
|
||||
|
||||
req = request.Request(
|
||||
auth_url, data=data, headers={"content-type": "application/json"}
|
||||
)
|
||||
|
||||
try:
|
||||
response = request.urlopen(req)
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
"Failed to obtain token via url request. "
|
||||
f"Exception thrown:\n{e}"
|
||||
)
|
||||
|
||||
json_response = json.loads(response.read().decode("utf-8"))
|
||||
assert "access_token" in json_response, (
|
||||
"'access_token' not key in"
|
||||
"json dictionary. JSON: \n"
|
||||
f"{json_response}"
|
||||
)
|
||||
token = json_response["access_token"]
|
||||
|
||||
return token
|
||||
|
||||
|
||||
def get_all_resources(
|
||||
url: str, data_source: str, collection: str, database: str, token: str
|
||||
) -> List:
|
||||
"""
|
||||
This function gets all the JSON objects for resources from the database
|
||||
:param url: Database url to use Atlas data API
|
||||
:param data_source: Data source name for the mongoDB database
|
||||
:param collection: Collection name for the mongoDB database
|
||||
:param database: Database name for the mongoDB database
|
||||
:param token: Token to access the database
|
||||
|
||||
:return: List of JSON objects for resources
|
||||
"""
|
||||
|
||||
url = f"{url}/action/find"
|
||||
data = {
|
||||
"dataSource": data_source,
|
||||
"collection": collection,
|
||||
"database": database,
|
||||
}
|
||||
data = json.dumps(data).encode("utf-8")
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
req = request.Request(url, data=data, headers=headers)
|
||||
|
||||
try:
|
||||
response = request.urlopen(req)
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
"Failed to obtain resources via url request. "
|
||||
f"Exception thrown:\n{e}"
|
||||
)
|
||||
|
||||
json_response = json.loads(response.read().decode("utf-8"))
|
||||
assert "documents" in json_response, (
|
||||
"'documents' not a key in "
|
||||
"json dictionary. JSON: \n"
|
||||
f"{json_response}"
|
||||
)
|
||||
resources = json_response["documents"]
|
||||
|
||||
return resources
|
||||
|
||||
|
||||
def save_resources_to_file(resources: List[Dict], output: TextIO):
|
||||
"""
|
||||
This function saves all the JSON objects for resources to a file.
|
||||
:param resources: List of JSON objects for resources
|
||||
:param output: Output directory absolute path
|
||||
"""
|
||||
|
||||
path = output.joinpath("resources.json")
|
||||
|
||||
with open(path, "w") as f:
|
||||
json.dump(resources, f, indent=4)
|
||||
|
||||
|
||||
def get_resource_local_filepath(
|
||||
resource_id: str, resource_version: str, base_output_path: TextIO
|
||||
) -> Path:
|
||||
"""
|
||||
This function returns the local filepath for a resource
|
||||
:param resource_url: URL field of the resource
|
||||
:param base_output_path: Base output directory absolute path
|
||||
:return: Local filepath for the resource
|
||||
"""
|
||||
filename = f"{resource_id}-{resource_version}"
|
||||
|
||||
filepath = Path(base_output_path).joinpath(filename)
|
||||
|
||||
return filepath
|
||||
|
||||
|
||||
def download_resources(resources: List[Dict], output: TextIO):
|
||||
"""
|
||||
This function downloads the resources which have a url field and
|
||||
updates the url field to point to the local download of the resource.
|
||||
:param output: Output directory absolute path
|
||||
"""
|
||||
|
||||
path = output.joinpath("offline_resources")
|
||||
if not path.exists():
|
||||
path.mkdir()
|
||||
|
||||
for resource in resources:
|
||||
if "url" in resource.keys():
|
||||
url = resource["url"]
|
||||
filepath = get_resource_local_filepath(
|
||||
resource["id"], resource["resource_version"], path
|
||||
)
|
||||
|
||||
if not filepath.exists() or ( # If the file does not exist
|
||||
"url-md5sum"
|
||||
in resource.keys() # If it exist but the md5sum is wrong.
|
||||
and hashlib.md5(filepath.read_bytes()).hexdigest()
|
||||
!= resource["url-md5sum"]
|
||||
):
|
||||
print(f"Downloading {url} to {filepath}")
|
||||
request.urlretrieve(url, filepath)
|
||||
resource["url"] = filepath.absolute().as_uri()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download all the remote gem5 Resources data and files "
|
||||
"to be cached and used locally."
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--config-file-path",
|
||||
type=str,
|
||||
default=Path(__file__)
|
||||
.resolve()
|
||||
.parent.joinpath("gem5_default_config.json"),
|
||||
help="Filepath to the gem5 config file",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default=Path.cwd(),
|
||||
help="Output directory path, default is the cwd."
|
||||
"The resources.json and all resources will be saved in this directory",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
output_path = Path(args.output_dir)
|
||||
|
||||
# Get the gem5 config file from the file
|
||||
|
||||
with open(args.config_file_path) as f:
|
||||
gem5_config_json = json.load(f)
|
||||
gem5_config_gem5_resources = gem5_config_json["sources"]["gem5-resources"]
|
||||
|
||||
# Parse the gem5 config
|
||||
db_url = gem5_config_gem5_resources["url"]
|
||||
data_source = gem5_config_gem5_resources["dataSource"]
|
||||
collection_name = gem5_config_gem5_resources["collection"]
|
||||
db_name = gem5_config_gem5_resources["database"]
|
||||
auth_url = gem5_config_gem5_resources["authUrl"]
|
||||
api_key = gem5_config_gem5_resources["apiKey"]
|
||||
|
||||
if not output_path.exists():
|
||||
output_path = output_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
token = get_token(auth_url, api_key)
|
||||
|
||||
resources = get_all_resources(
|
||||
db_url,
|
||||
data_source,
|
||||
collection_name,
|
||||
db_name,
|
||||
token,
|
||||
)
|
||||
|
||||
download_resources(resources, output_path)
|
||||
|
||||
save_resources_to_file(
|
||||
resources,
|
||||
output_path,
|
||||
)
|
||||
Reference in New Issue
Block a user