diff options
author | Frederick Muriuki Muriithi | 2024-02-05 06:08:19 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2024-02-05 06:08:19 +0300 |
commit | 635cf832f4717da6e8e7ef273a675a4ceea42ed0 (patch) | |
tree | a62c9b0e5cc7132fa5cf373ea52982f957f05b94 /r_qtl | |
parent | 650938cbdac6d5f0e779275082577543ab1c0e47 (diff) | |
download | gn-uploader-635cf832f4717da6e8e7ef273a675a4ceea42ed0.tar.gz |
Retrieve list of all files, and list of missing files
Add QC a function to list all files listed in the control file, and
another to list only the files missing from the bundle.
Diffstat (limited to 'r_qtl')
-rw-r--r-- | r_qtl/r_qtl2_qc.py | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/r_qtl/r_qtl2_qc.py b/r_qtl/r_qtl2_qc.py new file mode 100644 index 0000000..f666f40 --- /dev/null +++ b/r_qtl/r_qtl2_qc.py @@ -0,0 +1,47 @@ +"""Quality control checks for R/qtl2 data bundles.""" +from zipfile import ZipFile +from functools import reduce +from typing import Union, Sequence + +from r_qtl import r_qtl2 as rqtl2 +from r_qtl.r_qtl2 import __FILE_TYPES__ + +def bundle_files_list(zfile, cdata: dict) -> tuple[str]: + """Retrieve files listed in control file.""" + def __merge__(alist: tuple[str, ...], member: Union[str, Sequence[str]]) -> tuple[str, ...]: + if isinstance(cdata[member], str): + return alist + (cdata[member],) + return alist + tuple(cdata[member]) + + fileslist = tuple() + fileslist = reduce(__merge__, + (key for key in cdata.keys() if key in __FILE_TYPES__), + tuple()) + + if "file" in cdata.get("sex", {}): + sexfile = cdata["sex"]["file"] + fileslist = fileslist + ( + (sexfile,) if isinstance(sexfile, str) else tuple(sexfile)) + + if "file" in cdata.get("cross_info", {}): + crossinfofile = cdata["cross_info"]["file"] + fileslist = fileslist + ( + (crossinfofile,) if isinstance(crossinfofile, str) + else tuple(crossinfofile)) + + return fileslist + +def missing_files(zfile: ZipFile) -> tuple[str]: + """ + Retrieve a list of files listed in the control file that do not exist in the + bundle. + """ + def __missing_p__(thefile): + try: + zfile.getinfo(thefile) + return False + except KeyError: + return True + + return tuple(filter(__missing_p__, + bundle_files_list(zfile, rqtl2.control_data(zfile)))) |