From 635cf832f4717da6e8e7ef273a675a4ceea42ed0 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 5 Feb 2024 06:08:19 +0300 Subject: Retrieve list of all files, and list of missing files Add QC a function to list all files listed in the control file, and another to list only the files missing from the bundle. --- .../test_files/allfilesmissing_listmembers.zip | Bin 0 -> 429 bytes .../test_files/allfilesmissing_mixedmembers.zip | Bin 0 -> 413 bytes .../test_files/allfilesmissing_stringmembers.zip | Bin 0 -> 370 bytes .../test_files/somefilesmissing_mixedmembers.zip | Bin 0 -> 1913 bytes tests/r_qtl/test_r_qtl2_qc.py | 87 +++++++++++++++++++++ 5 files changed, 87 insertions(+) create mode 100644 tests/r_qtl/test_files/allfilesmissing_listmembers.zip create mode 100644 tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip create mode 100644 tests/r_qtl/test_files/allfilesmissing_stringmembers.zip create mode 100644 tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip create mode 100644 tests/r_qtl/test_r_qtl2_qc.py (limited to 'tests') diff --git a/tests/r_qtl/test_files/allfilesmissing_listmembers.zip b/tests/r_qtl/test_files/allfilesmissing_listmembers.zip new file mode 100644 index 0000000..8cdbe07 Binary files /dev/null and b/tests/r_qtl/test_files/allfilesmissing_listmembers.zip differ diff --git a/tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip b/tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip new file mode 100644 index 0000000..9278440 Binary files /dev/null and b/tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip differ diff --git a/tests/r_qtl/test_files/allfilesmissing_stringmembers.zip b/tests/r_qtl/test_files/allfilesmissing_stringmembers.zip new file mode 100644 index 0000000..2b356ec Binary files /dev/null and b/tests/r_qtl/test_files/allfilesmissing_stringmembers.zip differ diff --git a/tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip b/tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip new file mode 100644 index 0000000..b9320fc Binary files /dev/null and b/tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip differ diff --git a/tests/r_qtl/test_r_qtl2_qc.py b/tests/r_qtl/test_r_qtl2_qc.py new file mode 100644 index 0000000..5fcccf5 --- /dev/null +++ b/tests/r_qtl/test_r_qtl2_qc.py @@ -0,0 +1,87 @@ +"""Test that the QC functions work as expected""" +from pathlib import Path + +import pytest +from zipfile import ZipFile + +from r_qtl import r_qtl2 as rqtl2 +from r_qtl import r_qtl2_qc as qc + +###### DO NOT COMMIT THIS ###### +from quality_control.debug import __pk__ +###### END: DO NOT COMMIT THIS ###### + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filepath,expected", + (("tests/r_qtl/test_files/empty_control_file_yaml.zip", + tuple()), + ("tests/r_qtl/test_files/empty_control_file_json.zip", + tuple()), + ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip", + ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv", + "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")), + ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip", + ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", + "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv", + "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv", + "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", + "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", + "crossinfo01.csv", "crossinfo02.csv")), + ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip", + ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", + "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", + "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", + "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", + "crossinfo.csv")), + ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip", + ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", + "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", + "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", + "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", + "crossinfo.csv")))) +def test_bundle_files_list(filepath, expected): + """ + GIVEN: R/qtl2 bundle with a control file listing files + WHEN: `bundle_files_list` is called on the bundle + THEN: verify that ALL files listed in the control file are returned. + """ + with ZipFile(Path(filepath).absolute(), "r") as zfile: + assert qc.bundle_files_list( + zfile, rqtl2.control_data(zfile)) == expected + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filepath,expected", + (("tests/r_qtl/test_files/empty_control_file_yaml.zip", + tuple()), + ("tests/r_qtl/test_files/empty_control_file_json.zip", + tuple()), + ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip", + ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv", + "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")), + ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip", + ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", + "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv", + "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv", + "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", + "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", + "crossinfo01.csv", "crossinfo02.csv")), + ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip", + ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", + "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", + "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", + "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", + "crossinfo.csv")), + ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip", + ("fgeno01.csv", "covar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", + "pmap02.csv", "phenose02.csv", "sex03.csv", "crossinfo.csv")))) +def test_missing_files(filepath, expected): + """ + GIVEN: R/qtl2 bundle with a control file listing files + WHEN: `missing_files` is called on the bundle + THEN: verify that ALL files listed in the control file, that do not actually + exist in the bundle are returned. + """ + with ZipFile(Path(filepath).absolute(), "r") as zfile: + assert qc.missing_files(zfile) == expected -- cgit v1.2.3