diff options
| author | BonfaceKilz | 2021-03-03 11:20:20 +0300 | 
|---|---|---|
| committer | BonfaceKilz | 2021-03-08 21:09:58 +0300 | 
| commit | 3b2b49ba367cf33adc294809feeb917b10f340a0 (patch) | |
| tree | 18d1cd991e53c7f9a7cef265c0634c248b3b7a12 | |
| parent | b7a57dc5a97403906ee43e2fecac641611cf5ab8 (diff) | |
| download | genenetwork3-3b2b49ba367cf33adc294809feeb917b10f340a0.tar.gz | |
Create a more generic procedure for getting hash values of files
* gn3/file_utils.py (get_hash_of_values): New procedure. (get_dir_hash): Use more generic "get_hash_of_values" to compute the hash of a directory.
| -rw-r--r-- | gn3/file_utils.py | 21 | 
1 files changed, 10 insertions, 11 deletions
| diff --git a/gn3/file_utils.py b/gn3/file_utils.py index d5177b0..6b28784 100644 --- a/gn3/file_utils.py +++ b/gn3/file_utils.py @@ -8,21 +8,20 @@ import tarfile from functools import partial from typing import Dict +from typing import List from werkzeug.utils import secure_filename -def get_dir_hash(directory: str) -> str: - """Return the hash of a DIRECTORY""" +def get_hash_of_files(files: List[str]) -> str: + """Given a list of valid of FILES, return their hash as a string""" md5hash = hashlib.md5() - if not os.path.exists(directory): - raise FileNotFoundError - for root, _, files in os.walk(directory): - for names in sorted(files): - file_path = os.path.join(root, names) - with open(file_path, "rb") as file_: - for buf in iter(partial(file_.read, 4096), b''): - md5hash.update(bytearray(hashlib.md5(buf).hexdigest(), - "utf-8")) + for file_path in sorted(files): + if not os.path.exists(file_path): + raise FileNotFoundError + with open(file_path, "rb") as file_: + for buf in iter(partial(file_.read, 4096), b''): + md5hash.update(bytearray(hashlib.md5(buf).hexdigest(), + "utf-8")) return md5hash.hexdigest() | 
