diff options
-rw-r--r-- | gn3/file_utils.py | 20 | ||||
-rw-r--r-- | tests/unit/test_file_utils.py | 43 |
2 files changed, 62 insertions, 1 deletions
diff --git a/gn3/file_utils.py b/gn3/file_utils.py index c1c9d53..4dcf296 100644 --- a/gn3/file_utils.py +++ b/gn3/file_utils.py @@ -5,12 +5,14 @@ import os import random import string import tarfile + from functools import partial from typing import Dict from typing import List - from werkzeug.utils import secure_filename +import ipfshttpclient + def get_hash_of_files(files: List[str]) -> str: """Given a list of valid of FILES, return their hash as a string""" @@ -75,3 +77,19 @@ contents to TARGET_DIR/<dir-hash>. except Exception: return {"status": 128, "error": "gzip failed to unpack file"} return {"status": 0, "token": token} + + +def cache_ipfs_file(ipfs_file: str, + cache_dir: str, + ipfs_addr: str = "/ip4/127.0.0.1/tcp/5001") -> str: + """Check if a file exists in cache; if it doesn't, cache it. Return the + cached file location + + """ + file_loc = os.path.join(cache_dir, ipfs_file.split("ipfs/")[-1]) + if not os.path.exists(file_loc): + client = ipfshttpclient.connect(ipfs_addr) + client.get(ipfs_file, + target=os.path.join(cache_dir, + ipfs_file.split("ipfs/")[-1])) + return file_loc diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py index b319693..abe7aa6 100644 --- a/tests/unit/test_file_utils.py +++ b/tests/unit/test_file_utils.py @@ -8,6 +8,7 @@ from unittest import mock from gn3.file_utils import extract_uploaded_file from gn3.file_utils import get_dir_hash from gn3.file_utils import jsonfile_to_dict +from gn3.file_utils import cache_ipfs_file @dataclass @@ -19,6 +20,7 @@ class MockFile: class TestFileUtils(unittest.TestCase): """Test cases for procedures defined in file_utils.py""" + def test_get_dir_hash(self): """Test that a directory is hashed correctly""" test_dir = os.path.join(os.path.dirname(__file__), "test_data") @@ -75,3 +77,44 @@ extracting the file""" "status": 128, "error": "gzip failed to unpack file" }) + + def test_cache_ipfs_file_cache_hit(self): + """Test that the correct file location is returned if there's a cache hit""" + # Create empty file + test_dir = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test" + if not os.path.exists(test_dir): + os.mkdir(test_dir) + open(f"{test_dir}/genotype.txt", "a").close() + file_loc = cache_ipfs_file( + ipfs_file=("/ipfs/" + "QmQPeNsJPyVWPFDVHb" + "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/" + "genotype.txt"), + cache_dir="/tmp") + # Clean up + os.remove(f"{test_dir}/genotype.txt") + os.rmdir(test_dir) + self.assertEqual(file_loc, f"{test_dir}/genotype.txt") + + @mock.patch("gn3.file_utils.ipfshttpclient") + def test_cache_ipfs_file_cache_miss(self, + mock_ipfs): + """Test that a file is cached if there's a cache miss""" + mock_ipfs_client = mock.MagicMock() + mock_ipfs.connect.return_value = mock_ipfs_client + + test_dir = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test" + self.assertEqual(cache_ipfs_file( + ipfs_file=("/ipfs/" + "QmQPeNsJPyVWPFDVHb" + "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/" + "genotype.txt"), + cache_dir="/tmp" + ), f"{test_dir}/genotype.txt") + mock_ipfs_client.get.assert_called_once_with( + ("/ipfs/" + "QmQPeNsJPyVWPFDVHb" + "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/" + "genotype.txt"), + target=f"{test_dir}/genotype.txt" + ) |