about summary refs log tree commit diff
path: root/gn3/file_utils.py
diff options
context:
space:
mode:
authorBonfaceKilz2021-05-11 17:00:26 +0300
committerBonfaceKilz2021-05-13 11:18:57 +0300
commit378d0fc7f4ff5df5e8e77617c37bcef2b26ddf02 (patch)
tree59b6170f45c0857b0a7ba643e1746fdf5fc4c67a /gn3/file_utils.py
parent53f27b547e7220d46bdc2e92debb38a8739e511c (diff)
downloadgenenetwork3-378d0fc7f4ff5df5e8e77617c37bcef2b26ddf02.tar.gz
Rename file_utils to fs_helpers
Generally avoid naming things with a "utils" prefix/ suffix since it
encourages contributors to dump any new functions there; and over time, as the
code grows, things get messy...
Diffstat (limited to 'gn3/file_utils.py')
-rw-r--r--gn3/file_utils.py98
1 files changed, 0 insertions, 98 deletions
diff --git a/gn3/file_utils.py b/gn3/file_utils.py
deleted file mode 100644
index 73f6567..0000000
--- a/gn3/file_utils.py
+++ /dev/null
@@ -1,98 +0,0 @@
-"""Procedures that operate on files/ directories"""
-import hashlib
-import json
-import os
-import random
-import string
-import tarfile
-import pathlib
-
-from functools import partial
-from typing import Dict
-from typing import List
-from werkzeug.utils import secure_filename
-
-import ipfshttpclient
-
-
-def get_hash_of_files(files: List[str]) -> str:
-    """Given a list of valid of FILES, return their hash as a string"""
-    md5hash = hashlib.md5()
-    for file_path in sorted(files):
-        if not os.path.exists(file_path):
-            raise FileNotFoundError
-        with open(file_path, "rb") as file_:
-            for buf in iter(partial(file_.read, 4096), b''):
-                md5hash.update(bytearray(
-                    hashlib.md5(buf).hexdigest(), "utf-8"))
-    return md5hash.hexdigest()
-
-
-def get_dir_hash(directory: str) -> str:
-    """Return the hash of a DIRECTORY"""
-    if not os.path.exists(directory):
-        raise FileNotFoundError
-    all_files = [
-        os.path.join(root, names) for root, _, files in os.walk(directory)
-        for names in sorted(files)
-    ]
-    return get_hash_of_files(all_files)
-
-
-def jsonfile_to_dict(json_file: str) -> Dict:
-    """Give a JSON_FILE, return a python dict"""
-    with open(json_file) as _file:
-        data = json.load(_file)
-        return data
-    raise FileNotFoundError
-
-
-def generate_random_n_string(n_length: int) -> str:
-    """Generate a random string that is N chars long"""
-    return ''.join(
-        random.choice(string.ascii_uppercase + string.digits)
-        for _ in range(n_length))
-
-
-def extract_uploaded_file(gzipped_file,
-                          target_dir: str,
-                          token: str = "") -> Dict:
-    """Get the (directory) hash of extracted contents of GZIPPED_FILE; and move
-contents to TARGET_DIR/<dir-hash>.
-
-    """
-    if not token:
-        token = (f"{generate_random_n_string(6)}-"
-                 f"{generate_random_n_string(6)}")
-    tar_target_loc = os.path.join(target_dir, token,
-                                  secure_filename(gzipped_file.filename))
-    try:
-        if not os.path.exists(os.path.join(target_dir, token)):
-            os.mkdir(os.path.join(target_dir, token))
-        gzipped_file.save(tar_target_loc)
-        # Extract to "tar_target_loc/token"
-        tar = tarfile.open(tar_target_loc)
-        tar.extractall(path=os.path.join(target_dir, token))
-        tar.close()
-    # pylint: disable=W0703
-    except Exception:
-        return {"status": 128, "error": "gzip failed to unpack file"}
-    return {"status": 0, "token": token}
-
-
-def cache_ipfs_file(ipfs_file: str,
-                    cache_dir: str,
-                    ipfs_addr: str = "/ip4/127.0.0.1/tcp/5001") -> str:
-    """Check if a file exists in cache; if it doesn't, cache it.  Return the
-    cached file location
-
-    """
-    file_loc = os.path.join(cache_dir, ipfs_file.split("ipfs/")[-1])
-    if not os.path.exists(file_loc):
-        client = ipfshttpclient.connect(ipfs_addr)
-        client.get(ipfs_file,
-                   target=str(
-                       pathlib.Path
-                       (os.path.join(cache_dir,
-                                     ipfs_file.split("ipfs/")[-1])).parent))
-    return file_loc