about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-02-09 03:10:06 +0300
committerFrederick Muriuki Muriithi2024-02-12 18:17:38 +0300
commitdd369b846524fed0c08d1b7318fd73478506c3ee (patch)
tree7dd4969c327c2affb95e93a492ab904126ea33e4
parentd02cef83c3c0b3f3098df1a7e7eeaf90430f784a (diff)
downloadgn-uploader-dd369b846524fed0c08d1b7318fd73478506c3ee.tar.gz
Provide the key for each file listed in the control file.
-rw-r--r--r_qtl/r_qtl2_qc.py32
-rw-r--r--tests/r_qtl/test_r_qtl2_qc.py98
2 files changed, 83 insertions, 47 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py
index 4b3e184..a41c442 100644
--- a/r_qtl/r_qtl2_qc.py
+++ b/r_qtl/r_qtl2_qc.py
@@ -2,7 +2,7 @@
 import re
 from zipfile import ZipFile
 from functools import reduce
-from typing import Union, Sequence, Iterator, Optional, Callable
+from typing import Union, Iterator, Optional, Callable
 
 from r_qtl import errors as rqe
 from r_qtl import r_qtl2 as rqtl2
@@ -11,14 +11,15 @@ from r_qtl.fileerrors import MissingFile
 
 from quality_control.errors import InvalidValue
 
-def bundle_files_list(cdata: dict) -> tuple[str, ...]:
+def bundle_files_list(cdata: dict) -> tuple[tuple[str, str], ...]:
     """Retrieve files listed in control file."""
-    def __merge__(alist: tuple[str, ...], member: Union[str, Sequence[str]]) -> tuple[str, ...]:
+    def __merge__(alist: tuple[tuple[str, str], ...], member: str) -> tuple[
+            tuple[str, str], ...]:
         if isinstance(cdata[member], str):
-            return alist + (cdata[member],)
-        return alist + tuple(cdata[member])
+            return alist + ((member, str(cdata[member])),)
+        return alist + tuple((member, str(afile)) for afile in cdata[member])
 
-    fileslist: tuple[str, ...] = reduce(
+    fileslist: tuple[tuple[str, str], ...] = reduce(
         __merge__,
         (key for key in cdata.keys() if key in __FILE_TYPES__),
         tuple())
@@ -26,30 +27,33 @@ def bundle_files_list(cdata: dict) -> tuple[str, ...]:
     if "file" in cdata.get("sex", {}):
         sexfile = cdata["sex"]["file"]
         fileslist = fileslist + (
-            (sexfile,) if isinstance(sexfile, str) else tuple(sexfile))
+            (("sex.file", sexfile),) if isinstance(sexfile, str)
+            else tuple(("sex.file", afile) for afile in sexfile))
 
     if "file" in cdata.get("cross_info", {}):
         crossinfofile = cdata["cross_info"]["file"]
         fileslist = fileslist + (
-            (crossinfofile,) if isinstance(crossinfofile, str)
-            else tuple(crossinfofile))
+            (("cross_info.file", crossinfofile),)
+            if isinstance(crossinfofile, str)
+            else tuple(("cross_info.file", afile) for afile in crossinfofile))
 
     return fileslist
 
-def missing_files(zfile: ZipFile) -> tuple[str, ...]:
+def missing_files(zfile: ZipFile) -> tuple[tuple[str, str], ...]:
     """
     Retrieve a list of files listed in the control file that do not exist in the
     bundle.
     """
-    def __missing_p__(thefile):
+    def __missing_p__(filedetails: tuple[str, str]):
+        _cfkey, thefile = filedetails
         try:
             zfile.getinfo(thefile)
             return False
         except KeyError:
             return True
 
-    return tuple(filter(__missing_p__,
-                        bundle_files_list(rqtl2.control_data(zfile))))
+    return tuple(afile for afile in bundle_files_list(rqtl2.control_data(zfile))
+                 if __missing_p__(afile))
 
 def validate_bundle(zfile: ZipFile):
     """Ensure the R/qtl2 bundle is valid."""
@@ -57,7 +61,7 @@ def validate_bundle(zfile: ZipFile):
     if len(missing) > 0:
         raise rqe.MissingFileError(
                         "The following files do not exist in the bundle: " +
-                        ", ".join(missing))
+                        ", ".join(mfile[1] for mfile in missing))
 
 def make_genocode_checker(genocode: dict) -> Callable[[int, str, str], Optional[InvalidValue]]:
     """Make a checker from the genotypes in the control data"""
diff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py
index 554cfc4..d12172e 100644
--- a/tests/r_qtl/test_r_qtl2_qc.py
+++ b/tests/r_qtl/test_r_qtl2_qc.py
@@ -18,27 +18,44 @@ from quality_control.errors import InvalidValue
      ("tests/r_qtl/test_files/empty_control_file_json.zip",
       tuple()),
      ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
-      ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
-       "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
+      (("geno", "geno.csv"), ("founder_geno", "fgeno.csv"),
+       ("pheno", "pheno.csv"), ("covar", "covar.csv"),
+       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap.csv"),
+       ("pmap", "pmap.csv"), ("phenose", "phenose.csv"),
+       ("sex.file", "sex.csv"), ("cross_info.file", "crossinfo.csv"))),
      ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
-      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
-       "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
-       "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
-       "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
-       "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
-       "crossinfo01.csv", "crossinfo02.csv")),
+      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
+       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
+       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
+       ("pheno", "pheno02.csv"), ("covar", "covar01.csv"),
+       ("covar", "covar02.csv"), ("phenocovar", "phenocovar01.csv"),
+       ("phenocovar", "phenocovar02.csv"), ("phenocovar", "phenocovar03.csv"),
+       ("phenocovar", "phenocovar04.csv"), ("gmap", "gmap01.csv"),
+       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
+       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo01.csv"),
+       ("cross_info.file", "crossinfo02.csv"))),
      ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
-      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
-       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
-       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
-       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
-       "crossinfo.csv")),
+      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
+       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
+       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
+       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
+       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
+       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
+       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv"))),
      ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
-      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
-       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
-       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
-       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
-       "crossinfo.csv"))))
+      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
+       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
+       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
+       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
+       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
+       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
+       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv")))))
 def test_bundle_files_list(filepath, expected):
     """
     GIVEN: R/qtl2 bundle with a control file listing files
@@ -56,24 +73,39 @@ def test_bundle_files_list(filepath, expected):
      ("tests/r_qtl/test_files/empty_control_file_json.zip",
       tuple()),
      ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
-      ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
-       "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
+      (("geno", "geno.csv"), ("founder_geno", "fgeno.csv"),
+       ("pheno", "pheno.csv"), ("covar", "covar.csv"),
+       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap.csv"),
+       ("pmap", "pmap.csv"), ("phenose", "phenose.csv"),
+       ("sex.file", "sex.csv"), ("cross_info.file", "crossinfo.csv"))),
      ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
-      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
-       "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
-       "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
-       "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
-       "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
-       "crossinfo01.csv", "crossinfo02.csv")),
+      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
+       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
+       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
+       ("pheno", "pheno02.csv"), ("covar", "covar01.csv"),
+       ("covar", "covar02.csv"), ("phenocovar", "phenocovar01.csv"),
+       ("phenocovar", "phenocovar02.csv"), ("phenocovar", "phenocovar03.csv"),
+       ("phenocovar", "phenocovar04.csv"), ("gmap", "gmap01.csv"),
+       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
+       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo01.csv"),
+       ("cross_info.file", "crossinfo02.csv"))),
      ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
-      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
-       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
-       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
-       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
-       "crossinfo.csv")),
+      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
+       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
+       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
+       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
+       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
+       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
+       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv"))),
      ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
-      ("fgeno01.csv", "covar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv",
-       "pmap02.csv", "phenose02.csv", "sex03.csv", "crossinfo.csv"))))
+      (("founder_geno", "fgeno01.csv"), ("covar", "covar.csv"),
+       ("gmap", "gmap01.csv"), ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"),
+       ("pmap", "pmap02.csv"), ("phenose", "phenose02.csv"),
+       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv")))))
 def test_missing_files(filepath, expected):
     """
     GIVEN: R/qtl2 bundle with a control file listing files