util: add scripts that help maintain mongoDB

Change-Id: Ie421176782070462bb2a57351a04ba6ae004a9d4
This commit is contained in:
Harshil Patel
2023-12-05 10:50:21 -08:00
parent 569e21f798
commit 3f2a72641b
7 changed files with 440 additions and 0 deletions

View File

@@ -0,0 +1,48 @@
# Overview
This utility contains various scripts that are helpful when maintaining the gem5 resources MongoDB database.
The scripts in this directory use external libraries. Please install the required libraries mentioned in the
`requirements.txt` by running the following command:
```
pip3 install -r requirements.txt
```
## add-json-to-mongo.py
This script adds a list of resources from a JSON file to a specified collection in a MongoDB database. The JSON file should be in the format of a list of dictionaries, where each dictionary represents a resource.
To run this script you use the following command:
```
python3 ./add-json-to-mongo.py --uri <uri> --db_name <db_name> --collection_name <collection_name> --json_file <json_file>
```
## backup-db.py
This script grabs all documents from a specified collection in a MongoDB database and saves them to a JSON file.
To run this script you use the following command:
```
python3 ./backup-db.py --uri <uri> --db_name <db_name> --collection_name <collection_name>
```
## create-new-collection.py
This script grabs all documents from a specified collection in a MongoDB database and creates a new collection with the same documents.
To run this script you use the following command:
```
python3 ./create-new-collection.py --uri <uri> --db_name <db_name> --collection_name <collection_name> --new_collection_name <new_collection_name>
```
## update-gem5-versions.py
This script grabs all resources categorically from a specified collection in a MongoDB
database and adds a new gem5 version to the gem5_versions field of each
resource.
To run this script you use the following command:
```
python3 ./update-gem5-versions.py --uri <uri> --db <db_name> --collection <collection_name> --version <version> --category <category> --outfile <outfile>
```
## helper.py
This script contains helper functions for the scripts in this directory.

View File

@@ -0,0 +1,79 @@
#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script adds a list of resources from a JSON file to a specified
collection in a MongoDB database. The JSON file should be in the format
of a list of dictionaries, where each dictionary represents a resource.
To run this script you use the following command:
python3 ./add-json-to-mongo.py --uri <uri> --db_name <db_name> /
--collection_name <collection_name> --json_file <json_file>
"""
import argparse
import json
from helper import get_database
parser = argparse.ArgumentParser(
description="Add a list of resources from a "
"JSON file to a specified collection in a "
"MongoDB database"
)
parser.add_argument(
"--uri", help="URI of the database", type=str, required=True
)
parser.add_argument(
"--db_name", help="Name of the database", type=str, default="gem5-vision"
)
parser.add_argument(
"--collection_name",
help="Name of the collection",
type=str,
default="resources",
)
parser.add_argument(
"--json_file", help="Name of the json file", type=str, required=True
)
if __name__ == "__main__":
args = parser.parse_args()
uri = args.uri
db_name = args.db_name
collection_name = args.collection_name
json_file = args.json_file
# get resources from json file
with open(json_file) as f:
resources = json.load(f)
collection = get_database(uri, db_name, collection_name)
# insert resources into collection
collection.insert_many(resources)

View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script grabs all documents from a specified collection in a MongoDB
database and saves them to a JSON file.
To run this script you use the following command:
python3 ./backup-db.py --uri <uri> --db_name <db_name> --collection_name <collection_name>
"""
import argparse
from datetime import date
from helper import (
get_database,
save_to_json,
)
parser = argparse.ArgumentParser(
description="Get all documents from a "
"specified collection in a MongoDB database "
"and save them to a JSON file"
)
parser.add_argument(
"--uri", help="URI of the database", type=str, required=True
)
parser.add_argument(
"--db_name", help="Name of the database", type=str, default="gem5-vision"
)
parser.add_argument(
"--collection_name",
help="Name of the collection",
type=str,
default="resources",
)
args = parser.parse_args()
uri = args.uri
db_name = args.db_name
collection_name = args.collection_name
collection = get_database(uri, db_name, collection_name)
# get all documents from resources collection
resources = collection.find({})
# copy all documents from resources collection to resources_backup json file
save_to_json(resources, f"resources_backup_{date.today()}.json")

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script grabs all documents from a specified collection in a MongoDB
database and creates a new collection with the same documents.
To run this script you use the following command:
python3 ./create-new-collection.py --uri <uri> --db_name <db_name> --collection_name <collection_name> /
--new_collection_name <new_collection_name>
"""
import argparse
import json
from bson import json_util
from helper import get_database
parser = argparse.ArgumentParser(
description="Get all documents from a "
"specified collection in a MongoDB database "
"and create a new collection with the same "
"documents."
)
parser.add_argument(
"--uri", help="URI of the database", type=str, required=True
)
parser.add_argument(
"--new_collection_name",
help="Name of the new collection",
type=str,
required=True,
)
parser.add_argument(
"--db_name", help="Name of the database", type=str, default="gem5-vision"
)
parser.add_argument(
"--collection_name",
help="Name of the collection",
type=str,
default="resources",
)
args = parser.parse_args()
uri = args.uri
db_name = args.db_name
collection_name = args.collection_name
new_collection_name = args.new_collection_name
collection = get_database(uri, db_name, collection_name)
new_collection = get_database(uri, db_name, new_collection_name)
# get all documents from collection
# Suppress _id field from being returned
# as new collection will have its own _id field
resources = collection.find({}, {"_id": 0})
# convert to json
resources = json.loads(json_util.dumps(resources))
new_collection.insert_many(resources)

View File

@@ -0,0 +1,49 @@
#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script contains helper functions for the scripts in this directory.
This script requires external packages, please install them by running:
pip3 install -r requirements.txt
"""
import json
from bson import json_util
from pymongo import MongoClient
def get_database(uri, db_name, collection_name):
client = MongoClient(uri)
db = client[db_name]
collection = db[collection_name]
return collection
def save_to_json(resources, json_file_name):
with open(json_file_name, "w") as outfile:
json.dump(json.loads(json_util.dumps(resources)), outfile, indent=4)

View File

@@ -0,0 +1,3 @@
```
bson == 0.5.10
pymongo == 4.3.3

View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
# Copyright (c) 2023 The Regents of the University of California
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""
This script grabs all resources categorically from a specified collection in a MongoDB
database and adds a new gem5 version to the gem5_versions field of each
resource.
To run this script you use the following command:
python3 ./update-gem5-versions.py --uri <uri> --db <db_name> /
--collection <collection_name> --version <version> /
--category <category> --outfile <outfile>
"""
import argparse
import json
from bson import json_util
from helper import (
get_database,
save_to_json,
)
parser = argparse.ArgumentParser(
description="Get all resources from a "
"specified collection in a MongoDB database "
"and add a new gem5 version to the "
"gem5_versions field of each resource."
)
parser.add_argument(
"--uri", help="URI of the database", type=str, required=True
)
parser.add_argument(
"--version",
help="Version to add to gem5_versions",
type=str,
required=True,
)
parser.add_argument(
"--db", help="Name of the database", type=str, default="gem5-vision"
)
parser.add_argument(
"--collection",
help="Name of the collection",
type=str,
default="resources",
)
parser.add_argument(
"--category", help="Category to not update", action="append"
)
parser.add_argument(
"--outfile",
help="Name of the output json file",
type=str,
default="resources_update_gem5_versions.json",
)
if __name__ == "__main__":
args = parser.parse_args()
# if no category is excluded, set category to empty list
if args.category is None:
args.category = []
collection = get_database(args.uri, args.db, args.collection)
# get all documents from resources collection
resources = collection.find({}, {"_id": 0})
resources = json.loads(json_util.dumps(resources))
for resource in resources:
if resource["category"] in args.category:
continue
if args.version in resource["gem5_versions"]:
continue
resource["gem5_versions"].append(args.version)
save_to_json(resources, args.outfile)