aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-02-12 15:18:21 +0300
committerBonfaceKilz2021-02-12 15:36:41 +0300
commit21a4a847456fde5fcc6072df0d0fc36992da283d (patch)
treef6921e2e740f3ce3f528258044aad1dbf938bd1e
parentedd18bffdb179db75769b5a47b9258e5eded5aaf (diff)
downloadgenenetwork3-21a4a847456fde5fcc6072df0d0fc36992da283d.tar.gz
Add function for computing the hash of a directory
-rw-r--r--gn3/__init__.py0
-rw-r--r--gn3/file_utils.py20
-rw-r--r--tests/__init__.py0
-rw-r--r--tests/unit/__init__.py0
-rw-r--r--tests/unit/test_data/metadata.json9
-rw-r--r--tests/unit/test_data/phenotype.tsv21
-rw-r--r--tests/unit/test_file_utils.py20
7 files changed, 70 insertions, 0 deletions
diff --git a/gn3/__init__.py b/gn3/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gn3/__init__.py
diff --git a/gn3/file_utils.py b/gn3/file_utils.py
new file mode 100644
index 0000000..8e342c9
--- /dev/null
+++ b/gn3/file_utils.py
@@ -0,0 +1,20 @@
+"""Procedures that operate on files/ directories"""
+import hashlib
+import os
+
+from functools import partial
+
+
+def get_dir_hash(directory: str) -> str:
+ """Return the hash of a DIRECTORY"""
+ md5hash = hashlib.md5()
+ if not os.path.exists(directory):
+ raise FileNotFoundError
+ for root, _, files in os.walk(directory):
+ for names in files:
+ file_path = os.path.join(root, names)
+ with open(file_path, "rb") as file_:
+ for buf in iter(partial(file_.read, 4096), b''):
+ md5hash.update(bytearray(hashlib.md5(buf).hexdigest(),
+ "utf-8"))
+ return md5hash.hexdigest()
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/__init__.py
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tests/unit/__init__.py
diff --git a/tests/unit/test_data/metadata.json b/tests/unit/test_data/metadata.json
new file mode 100644
index 0000000..d7dd267
--- /dev/null
+++ b/tests/unit/test_data/metadata.json
@@ -0,0 +1,9 @@
+{
+ "title": "This is my dataset for testing the REST API",
+ "description": "Longer description",
+ "date": "20210127",
+ "authors": [
+ "R. W. Williams"
+ ],
+ "cross": "BXD"
+}
diff --git a/tests/unit/test_data/phenotype.tsv b/tests/unit/test_data/phenotype.tsv
new file mode 100644
index 0000000..b9115ce
--- /dev/null
+++ b/tests/unit/test_data/phenotype.tsv
@@ -0,0 +1,21 @@
+ pheno
+BXD01 5.060
+BXD02 307.866
+BXD03 185.400
+BXD04 380.729
+BXD05 150.066
+BXD06 94.483
+BXD07 438.700
+BXD08 NA
+BXD09 130.457
+BXD10 184.900
+BXD11 223.400
+BXD12 167.250
+BXD13 313.950
+BXD14 219.383
+BXD15 277.800
+BXD16 6.467
+BXD17 364.967
+BXD18 132.016
+BXD19 468.133
+BXD20 309.500
diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py
new file mode 100644
index 0000000..e6109bb
--- /dev/null
+++ b/tests/unit/test_file_utils.py
@@ -0,0 +1,20 @@
+"""Test cases for procedures defined in file_utils.py"""
+import os
+import unittest
+
+from gn3.file_utils import get_dir_hash
+
+
+class TestFileUtils(unittest.TestCase):
+ """Test cases for procedures defined in file_utils.py"""
+ def test_get_dir_hash(self):
+ """Test that a directory is hashed correctly"""
+ test_dir = os.path.join(os.path.dirname(__file__), "test_data")
+ self.assertEqual("928a0e2e4846b4b3c2881d9c1d6cfce4",
+ get_dir_hash(test_dir))
+
+ def test_get_dir_hash_non_existent_dir(self):
+ """Test thata an error is raised when the dir does not exist"""
+ self.assertRaises(FileNotFoundError,
+ get_dir_hash,
+ "/non-existent-file")