aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gn3/file_utils.py20
-rw-r--r--tests/unit/test_file_utils.py43
2 files changed, 62 insertions, 1 deletions
diff --git a/gn3/file_utils.py b/gn3/file_utils.py
index c1c9d53..4dcf296 100644
--- a/gn3/file_utils.py
+++ b/gn3/file_utils.py
@@ -5,12 +5,14 @@ import os
import random
import string
import tarfile
+
from functools import partial
from typing import Dict
from typing import List
-
from werkzeug.utils import secure_filename
+import ipfshttpclient
+
def get_hash_of_files(files: List[str]) -> str:
"""Given a list of valid of FILES, return their hash as a string"""
@@ -75,3 +77,19 @@ contents to TARGET_DIR/<dir-hash>.
except Exception:
return {"status": 128, "error": "gzip failed to unpack file"}
return {"status": 0, "token": token}
+
+
+def cache_ipfs_file(ipfs_file: str,
+ cache_dir: str,
+ ipfs_addr: str = "/ip4/127.0.0.1/tcp/5001") -> str:
+ """Check if a file exists in cache; if it doesn't, cache it. Return the
+ cached file location
+
+ """
+ file_loc = os.path.join(cache_dir, ipfs_file.split("ipfs/")[-1])
+ if not os.path.exists(file_loc):
+ client = ipfshttpclient.connect(ipfs_addr)
+ client.get(ipfs_file,
+ target=os.path.join(cache_dir,
+ ipfs_file.split("ipfs/")[-1]))
+ return file_loc
diff --git a/tests/unit/test_file_utils.py b/tests/unit/test_file_utils.py
index b319693..abe7aa6 100644
--- a/tests/unit/test_file_utils.py
+++ b/tests/unit/test_file_utils.py
@@ -8,6 +8,7 @@ from unittest import mock
from gn3.file_utils import extract_uploaded_file
from gn3.file_utils import get_dir_hash
from gn3.file_utils import jsonfile_to_dict
+from gn3.file_utils import cache_ipfs_file
@dataclass
@@ -19,6 +20,7 @@ class MockFile:
class TestFileUtils(unittest.TestCase):
"""Test cases for procedures defined in file_utils.py"""
+
def test_get_dir_hash(self):
"""Test that a directory is hashed correctly"""
test_dir = os.path.join(os.path.dirname(__file__), "test_data")
@@ -75,3 +77,44 @@ extracting the file"""
"status": 128,
"error": "gzip failed to unpack file"
})
+
+ def test_cache_ipfs_file_cache_hit(self):
+ """Test that the correct file location is returned if there's a cache hit"""
+ # Create empty file
+ test_dir = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test"
+ if not os.path.exists(test_dir):
+ os.mkdir(test_dir)
+ open(f"{test_dir}/genotype.txt", "a").close()
+ file_loc = cache_ipfs_file(
+ ipfs_file=("/ipfs/"
+ "QmQPeNsJPyVWPFDVHb"
+ "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/"
+ "genotype.txt"),
+ cache_dir="/tmp")
+ # Clean up
+ os.remove(f"{test_dir}/genotype.txt")
+ os.rmdir(test_dir)
+ self.assertEqual(file_loc, f"{test_dir}/genotype.txt")
+
+ @mock.patch("gn3.file_utils.ipfshttpclient")
+ def test_cache_ipfs_file_cache_miss(self,
+ mock_ipfs):
+ """Test that a file is cached if there's a cache miss"""
+ mock_ipfs_client = mock.MagicMock()
+ mock_ipfs.connect.return_value = mock_ipfs_client
+
+ test_dir = "/tmp/QmQPeNsJPyVWPFDVHb77w8G42Fvo15z4bG2X8D2GhfbSXc-test"
+ self.assertEqual(cache_ipfs_file(
+ ipfs_file=("/ipfs/"
+ "QmQPeNsJPyVWPFDVHb"
+ "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/"
+ "genotype.txt"),
+ cache_dir="/tmp"
+ ), f"{test_dir}/genotype.txt")
+ mock_ipfs_client.get.assert_called_once_with(
+ ("/ipfs/"
+ "QmQPeNsJPyVWPFDVHb"
+ "77w8G42Fvo15z4bG2X8D2GhfbSXc-test/"
+ "genotype.txt"),
+ target=f"{test_dir}/genotype.txt"
+ )