about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--r_qtl/r_qtl2_qc.py53
1 files changed, 46 insertions, 7 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
index 7e93d23..6f7b374 100644
--- a/r_qtl/r_qtl2_qc.py
+++ b/r_qtl/r_qtl2_qc.py
@@ -1,6 +1,7 @@
 """Quality control checks for R/qtl2 data bundles."""
-from zipfile import ZipFile
+from pathlib import Path
 from functools import reduce, partial
+from zipfile import ZipFile, is_zipfile
 from typing import Union, Iterator, Optional, Callable
 
 from r_qtl import r_qtl2 as rqtl2
@@ -39,11 +40,10 @@ def bundle_files_list(cdata: dict) -> tuple[tuple[str, str], ...]:
 
     return fileslist
 
-def missing_files(zfile: ZipFile) -> tuple[tuple[str, str], ...]:
-    """
-    Retrieve a list of files listed in the control file that do not exist in the
-    bundle.
-    """
+
+def __missing_from_zipfile__(
+        zfile: ZipFile, cdata: dict) -> tuple[tuple[str, str], ...]:
+    """Check for missing files from a still-compressed zip file."""
     def __missing_p__(filedetails: tuple[str, str]):
         _cfkey, thefile = filedetails
         try:
@@ -52,9 +52,48 @@ def missing_files(zfile: ZipFile) -> tuple[tuple[str, str], ...]:
         except KeyError:
             return True
 
-    return tuple(afile for afile in bundle_files_list(rqtl2.control_data(zfile))
+    return tuple(afile for afile in bundle_files_list(cdata)
                  if __missing_p__(afile))
 
+
+def __missing_from_dirpath__(
+        dirpath: Path, cdata: dict) -> tuple[tuple[str, str], ...]:
+    """Check for missing files from an extracted bundle."""
+    allfiles = tuple(_file.name for _file in dirpath.iterdir())
+    return tuple(afile for afile in bundle_files_list(cdata)
+                 if afile[1] not in allfiles)
+
+
+def missing_files(bundlesrc: Union[Path, ZipFile]) -> tuple[tuple[str, str], ...]:
+    """
+    Retrieve a list of files listed in the control file that do not exist in the
+    bundle.
+
+    Parameters
+    ----------
+    bundlesrc: Path object of ZipFile object: This is the bundle under check.
+
+    Returns
+    -------
+    A tuple of names listed in the control file that do not exist in the bundle.
+
+    Raises
+    ------
+    r_qtl.exceptions.InvalidFormat
+    """
+    cdata = rqtl2.control_data(bundlesrc)
+    if isinstance(bundlesrc, ZipFile):
+        return __missing_from_zipfile__(bundlesrc, cdata)
+    if isinstance(bundlesrc, Path):
+        if is_zipfile(bundlesrc):
+            return __missing_from_zipfile__(ZipFile(bundlesrc, cdata))
+        if bundlesrc.is_dir():
+            return __missing_from_dirpath__(bundlesrc, cdata)
+    raise InvalidFormat(
+        "Expects either a zipfile.ZipFile object or a pathlib.Path object "
+        "pointing to a directory containing the R/qtl2 bundle.")
+
+
 def validate_bundle(zfile: ZipFile):
     """Ensure the R/qtl2 bundle is valid."""
     missing = missing_files(zfile)