about summary refs log tree commit diff
path: root/r_qtl/r_qtl2_qc.py
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-05 06:08:19 +0300
committerFrederick Muriuki Muriithi2024-02-05 06:08:19 +0300
commit635cf832f4717da6e8e7ef273a675a4ceea42ed0 (patch)
treea62c9b0e5cc7132fa5cf373ea52982f957f05b94 /r_qtl/r_qtl2_qc.py
parent650938cbdac6d5f0e779275082577543ab1c0e47 (diff)
downloadgn-uploader-635cf832f4717da6e8e7ef273a675a4ceea42ed0.tar.gz
Retrieve list of all files, and list of missing files
Add QC a function to list all files listed in the control file, and
another to list only the files missing from the bundle.
Diffstat (limited to 'r_qtl/r_qtl2_qc.py')
-rw-r--r--r_qtl/r_qtl2_qc.py47
1 files changed, 47 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
new file mode 100644
index 0000000..f666f40
--- /dev/null
+++ b/r_qtl/r_qtl2_qc.py
@@ -0,0 +1,47 @@
+"""Quality control checks for R/qtl2 data bundles."""
+from zipfile import ZipFile
+from functools import reduce
+from typing import Union, Sequence
+
+from r_qtl import r_qtl2 as rqtl2
+from r_qtl.r_qtl2 import __FILE_TYPES__
+
+def bundle_files_list(zfile, cdata: dict) -> tuple[str]:
+    """Retrieve files listed in control file."""
+    def __merge__(alist: tuple[str, ...], member: Union[str, Sequence[str]]) -> tuple[str, ...]:
+        if isinstance(cdata[member], str):
+            return alist + (cdata[member],)
+        return alist + tuple(cdata[member])
+
+    fileslist = tuple()
+    fileslist = reduce(__merge__,
+                       (key for key in cdata.keys() if key in __FILE_TYPES__),
+                       tuple())
+
+    if "file" in cdata.get("sex", {}):
+        sexfile = cdata["sex"]["file"]
+        fileslist = fileslist + (
+            (sexfile,) if isinstance(sexfile, str) else tuple(sexfile))
+        
+    if "file" in cdata.get("cross_info", {}):
+        crossinfofile = cdata["cross_info"]["file"]
+        fileslist = fileslist + (
+            (crossinfofile,) if isinstance(crossinfofile, str)
+            else tuple(crossinfofile))
+
+    return fileslist
+
+def missing_files(zfile: ZipFile) -> tuple[str]:
+    """
+    Retrieve a list of files listed in the control file that do not exist in the
+    bundle.
+    """
+    def __missing_p__(thefile):
+        try:
+            zfile.getinfo(thefile)
+            return False
+        except KeyError:
+            return True
+
+    return tuple(filter(__missing_p__,
+                        bundle_files_list(zfile, rqtl2.control_data(zfile))))