stdlib: Update the stdlib resource's md5 utils
The commit does the following: - Moves the md5 functions to their own Python module (this will allow us to use this elsewhere). - Add functionality to enable md5 values for directories. - Adds Pyunit tests for the md5 functionality. Change-Id: I224d4584ed6c35fac3a75e221b3cb48d863ffa6f Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/58849 Reviewed-by: Jason Lowe-Power <power.jg@gmail.com> Maintainer: Jason Lowe-Power <power.jg@gmail.com> Tested-by: kokoro <noreply+kokoro@google.com> Reviewed-by: Bobby Bruce <bbruce@ucdavis.edu> Maintainer: Bobby Bruce <bbruce@ucdavis.edu>
This commit is contained in:
committed by
Bobby Bruce
parent
35f7008602
commit
e33f9b830b
@@ -208,6 +208,7 @@ PySource('gem5.prebuilt.demo', 'gem5/prebuilt/demo/__init__.py')
|
||||
PySource('gem5.prebuilt.demo', 'gem5/prebuilt/demo/x86_demo_board.py')
|
||||
PySource('gem5.resources', 'gem5/resources/__init__.py')
|
||||
PySource('gem5.resources', 'gem5/resources/downloader.py')
|
||||
PySource('gem5.resources', 'gem5/resources/md5_utils.py')
|
||||
PySource('gem5.resources', 'gem5/resources/resource.py')
|
||||
PySource('gem5.utils', 'gem5/utils/__init__.py')
|
||||
PySource('gem5.utils', 'gem5/utils/filelock.py')
|
||||
|
||||
@@ -34,10 +34,13 @@ import hashlib
|
||||
import base64
|
||||
import time
|
||||
import random
|
||||
from pathlib import Path
|
||||
from tempfile import gettempdir
|
||||
from urllib.error import HTTPError
|
||||
from typing import List, Dict
|
||||
|
||||
from .md5_utils import md5_file, md5_dir
|
||||
|
||||
from ..utils.filelock import FileLock
|
||||
|
||||
"""
|
||||
@@ -195,31 +198,6 @@ def _get_resources(resources_group: Dict) -> Dict[str, Dict]:
|
||||
|
||||
return to_return
|
||||
|
||||
|
||||
def _get_md5(file: str) -> str:
|
||||
"""
|
||||
Gets the md5 of a file.
|
||||
|
||||
:param file: The file needing an md5 value.
|
||||
|
||||
:returns: The md5 of the input file.
|
||||
"""
|
||||
|
||||
# Note: This code is slightly more complex than you might expect as
|
||||
# `hashlib.md5(<file>)` returns malloc errors for large files (such as
|
||||
# disk images).
|
||||
md5_object = hashlib.md5()
|
||||
block_size = 128 * md5_object.block_size
|
||||
a_file = open(file, "rb")
|
||||
chunk = a_file.read(block_size)
|
||||
|
||||
while chunk:
|
||||
md5_object.update(chunk)
|
||||
chunk = a_file.read(block_size)
|
||||
|
||||
return md5_object.hexdigest()
|
||||
|
||||
|
||||
def _download(
|
||||
url: str,
|
||||
download_to: str,
|
||||
@@ -343,17 +321,20 @@ def get_resource(
|
||||
|
||||
if os.path.exists(to_path):
|
||||
|
||||
if not os.path.isfile(to_path):
|
||||
raise Exception(
|
||||
"There is a directory at '{}'.".format(to_path)
|
||||
)
|
||||
if os.path.isfile(to_path):
|
||||
md5 = md5_file(Path(to_path))
|
||||
else:
|
||||
md5 = md5_dir(Path(to_path))
|
||||
|
||||
if _get_md5(to_path) == resource_json["md5sum"]:
|
||||
if md5 == resource_json["md5sum"]:
|
||||
# In this case, the file has already been download, no need to
|
||||
# do so again.
|
||||
return
|
||||
elif download_md5_mismatch:
|
||||
os.remove(to_path)
|
||||
if os.path.isfile(to_path):
|
||||
os.remove(to_path)
|
||||
else:
|
||||
shutil.rmtree(to_path)
|
||||
else:
|
||||
raise Exception(
|
||||
"There already a file present at '{}' but "
|
||||
|
||||
65
src/python/gem5/resources/md5_utils.py
Normal file
65
src/python/gem5/resources/md5_utils.py
Normal file
@@ -0,0 +1,65 @@
|
||||
# Copyright (c) 2022 The Regents of the University of California
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from pathlib import Path
|
||||
import hashlib
|
||||
from _hashlib import HASH as Hash
|
||||
|
||||
def _md5_update_from_file(filename: Path, hash: Hash) -> Hash:
|
||||
assert filename.is_file()
|
||||
with open(str(filename), "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash.update(chunk)
|
||||
return hash
|
||||
|
||||
def _md5_update_from_dir(directory: Path, hash: Hash) -> Hash:
|
||||
assert directory.is_dir()
|
||||
for path in sorted(directory.iterdir(), key=lambda p: str(p).lower()):
|
||||
hash.update(path.name.encode())
|
||||
if path.is_file():
|
||||
hash = _md5_update_from_file(path, hash)
|
||||
elif path.is_dir():
|
||||
hash = _md5_update_from_dir(path, hash)
|
||||
return hash
|
||||
|
||||
def md5_file(filename: Path) -> str:
|
||||
"""
|
||||
Gives the md5 hash of a file
|
||||
|
||||
:filename: The file in which the md5 is to be calculated.
|
||||
"""
|
||||
return str(_md5_update_from_file(filename, hashlib.md5()).hexdigest())
|
||||
|
||||
def md5_dir(directory: Path) -> str:
|
||||
"""
|
||||
Gives the md5 value of a directory.
|
||||
|
||||
This is achieved by getting the md5 hash of all files in the directory.
|
||||
|
||||
Note: The path of files are also hashed so the md5 of the directory changes
|
||||
if empty files are included or filenames are changed.
|
||||
"""
|
||||
return str(_md5_update_from_dir(directory, hashlib.md5()).hexdigest())
|
||||
Reference in New Issue
Block a user