aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-03-03 11:20:20 +0300
committerBonfaceKilz2021-03-08 21:09:58 +0300
commit3b2b49ba367cf33adc294809feeb917b10f340a0 (patch)
tree18d1cd991e53c7f9a7cef265c0634c248b3b7a12
parentb7a57dc5a97403906ee43e2fecac641611cf5ab8 (diff)
downloadgenenetwork3-3b2b49ba367cf33adc294809feeb917b10f340a0.tar.gz
Create a more generic procedure for getting hash values of files
* gn3/file_utils.py (get_hash_of_values): New procedure. (get_dir_hash): Use more generic "get_hash_of_values" to compute the hash of a directory.
-rw-r--r--gn3/file_utils.py21
1 files changed, 10 insertions, 11 deletions
diff --git a/gn3/file_utils.py b/gn3/file_utils.py
index d5177b0..6b28784 100644
--- a/gn3/file_utils.py
+++ b/gn3/file_utils.py
@@ -8,21 +8,20 @@ import tarfile
from functools import partial
from typing import Dict
+from typing import List
from werkzeug.utils import secure_filename
-def get_dir_hash(directory: str) -> str:
- """Return the hash of a DIRECTORY"""
+def get_hash_of_files(files: List[str]) -> str:
+ """Given a list of valid of FILES, return their hash as a string"""
md5hash = hashlib.md5()
- if not os.path.exists(directory):
- raise FileNotFoundError
- for root, _, files in os.walk(directory):
- for names in sorted(files):
- file_path = os.path.join(root, names)
- with open(file_path, "rb") as file_:
- for buf in iter(partial(file_.read, 4096), b''):
- md5hash.update(bytearray(hashlib.md5(buf).hexdigest(),
- "utf-8"))
+ for file_path in sorted(files):
+ if not os.path.exists(file_path):
+ raise FileNotFoundError
+ with open(file_path, "rb") as file_:
+ for buf in iter(partial(file_.read, 4096), b''):
+ md5hash.update(bytearray(hashlib.md5(buf).hexdigest(),
+ "utf-8"))
return md5hash.hexdigest()