diff options
author | BonfaceKilz | 2021-02-12 15:18:21 +0300 |
---|---|---|
committer | BonfaceKilz | 2021-02-12 15:36:41 +0300 |
commit | 21a4a847456fde5fcc6072df0d0fc36992da283d (patch) | |
tree | f6921e2e740f3ce3f528258044aad1dbf938bd1e /gn3/file_utils.py | |
parent | edd18bffdb179db75769b5a47b9258e5eded5aaf (diff) | |
download | genenetwork3-21a4a847456fde5fcc6072df0d0fc36992da283d.tar.gz |
Add function for computing the hash of a directory
Diffstat (limited to 'gn3/file_utils.py')
-rw-r--r-- | gn3/file_utils.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/gn3/file_utils.py b/gn3/file_utils.py new file mode 100644 index 0000000..8e342c9 --- /dev/null +++ b/gn3/file_utils.py @@ -0,0 +1,20 @@ +"""Procedures that operate on files/ directories""" +import hashlib +import os + +from functools import partial + + +def get_dir_hash(directory: str) -> str: + """Return the hash of a DIRECTORY""" + md5hash = hashlib.md5() + if not os.path.exists(directory): + raise FileNotFoundError + for root, _, files in os.walk(directory): + for names in files: + file_path = os.path.join(root, names) + with open(file_path, "rb") as file_: + for buf in iter(partial(file_.read, 4096), b''): + md5hash.update(bytearray(hashlib.md5(buf).hexdigest(), + "utf-8")) + return md5hash.hexdigest() |