From 21a4a847456fde5fcc6072df0d0fc36992da283d Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Fri, 12 Feb 2021 15:18:21 +0300 Subject: Add function for computing the hash of a directory --- gn3/__init__.py | 0 gn3/file_utils.py | 20 ++++++++++++++++++++ tests/__init__.py | 0 tests/unit/__init__.py | 0 tests/unit/test_data/metadata.json | 9 +++++++++ tests/unit/test_data/phenotype.tsv | 21 +++++++++++++++++++++ tests/unit/test_file_utils.py | 20 ++++++++++++++++++++ 7 files changed, 70 insertions(+) create mode 100644 gn3/__init__.py create mode 100644 gn3/file_utils.py create mode 100644 tests/__init__.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/test_data/metadata.json create mode 100644 tests/unit/test_data/phenotype.tsv create mode 100644 tests/unit/test_file_utils.py diff --git a/gn3/__init__.py b/gn3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gn3/file_utils.py b/gn3/file_utils.py new file mode 100644 index 0000000..8e342c9 --- /dev/null +++ b/gn3/file_utils.py @@ -0,0 +1,20 @@ +"""Procedures that operate on files/ directories""" +import hashlib +import os + +from functools import partial + + +def get_dir_hash(directory: str) -> str: + """Return the hash of a DIRECTORY""" + md5hash = hashlib.md5() + if not os.path.exists(directory): + raise FileNotFoundError + for root, _, files in os.walk(directory): + for names in files: + file_path = os.path.join(root, names) + with open(file_path, "rb") as file_: + for buf in iter(partial(file_.read, 4096), b''): + md5hash.update(bytearray(hashlib.md5(buf).hexdigest(), + "utf-8")) + return md5hash.hexdigest() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_data/metadata.json b/tests/unit/test_data/metadata.json new file mode 100644 index 0000000..d7dd267 --- /dev/null +++ b/tests/unit/test_data/metadata.json @@ -0,0 +1,9 @@ +{ + "title": "This is my dataset for testing the REST API", + "description": "Longer description", + "date": "20210127", + "authors": [ + "R. W. Williams" + ], + "cross": "BXD" +} diff --git a/tests/unit/test_data/phenotype.tsv b/tests/unit/test_data/phenotype.tsv new file mode 100644 index 0000000..b9115ce --- /dev/null +++ b/tests/unit/test_data/phenotype.tsv @@ -0,0 +1,21 @@ + pheno +BXD01 5.060 +BXD02 307.866 +BXD03 185.400 +BXD04 380.729 +BXD05 150.066 +BXD06 94.483 +BXD07 438.700 +BXD08 NA +BXD09 130.457 +BXD10 184.900 +BXD11 223.400 +BXD12 167.250 +BXD13 313.950 +BXD14 219.383 +BXD15 277.800 +BXD16 6.467 +BXD17 364.967 +BXD18 132.016 +BXD19 468.133 +BXD20 309.500 diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py new file mode 100644 index 0000000..e6109bb --- /dev/null +++ b/tests/unit/test_file_utils.py @@ -0,0 +1,20 @@ +"""Test cases for procedures defined in file_utils.py""" +import os +import unittest + +from gn3.file_utils import get_dir_hash + + +class TestFileUtils(unittest.TestCase): + """Test cases for procedures defined in file_utils.py""" + def test_get_dir_hash(self): + """Test that a directory is hashed correctly""" + test_dir = os.path.join(os.path.dirname(__file__), "test_data") + self.assertEqual("928a0e2e4846b4b3c2881d9c1d6cfce4", + get_dir_hash(test_dir)) + + def test_get_dir_hash_non_existent_dir(self): + """Test thata an error is raised when the dir does not exist""" + self.assertRaises(FileNotFoundError, + get_dir_hash, + "/non-existent-file") -- cgit v1.2.3