"""Test that the QC functions work as expected""" from pathlib import Path import pytest from zipfile import ZipFile from r_qtl import r_qtl2 as rqtl2 from r_qtl import r_qtl2_qc as rqc from r_qtl.fileerrors import MissingFile from quality_control.errors import InvalidValue @pytest.mark.unit_test @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", tuple()), ("tests/r_qtl/test_files/empty_control_file_json.zip", tuple()), ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip", ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv", "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")), ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip", ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv", "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv", "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", "crossinfo01.csv", "crossinfo02.csv")), ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip", ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", "crossinfo.csv")), ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip", ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", "crossinfo.csv")))) def test_bundle_files_list(filepath, expected): """ GIVEN: R/qtl2 bundle with a control file listing files WHEN: `bundle_files_list` is called on the bundle THEN: verify that ALL files listed in the control file are returned. """ with ZipFile(Path(filepath).absolute(), "r") as zfile: assert rqc.bundle_files_list(rqtl2.control_data(zfile)) == expected @pytest.mark.unit_test @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", tuple()), ("tests/r_qtl/test_files/empty_control_file_json.zip", tuple()), ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip", ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv", "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")), ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip", ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv", "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv", "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", "crossinfo01.csv", "crossinfo02.csv")), ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip", ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv", "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv", "crossinfo.csv")), ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip", ("fgeno01.csv", "covar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose02.csv", "sex03.csv", "crossinfo.csv")))) def test_missing_files(filepath, expected): """ GIVEN: R/qtl2 bundle with a control file listing files WHEN: `missing_files` is called on the bundle THEN: verify that ALL files listed in the control file, that do not actually exist in the bundle are returned. """ with ZipFile(Path(filepath).absolute(), "r") as zfile: assert rqc.missing_files(zfile) == expected @pytest.mark.unit_test @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", (MissingFile("geno", None, "Missing 'geno' file 'None'."),)), ("tests/r_qtl/test_files/test_geno.zip", tuple()), ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip", (InvalidValue(1, "AXR-1", "X", "Invalid value 'X'. Expected one of ('L', 'C')."), InvalidValue(2, "EC.480C", "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."), InvalidValue(6, "HH.335C-Col/PhyA", "H", f"Invalid value 'H'. Expected one of ('L', 'C')."))))) def test_geno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle WHEN: We call r_qtl.r_qtl2_qc.geno_errors(..) on it THEN: We should get a sequence of all errors present in the file, or an empty sequence if no errors exist. """ with ZipFile(Path(filepath).absolute(), "r") as zfile: assert tuple(rqc.geno_errors(zfile)) == expected @pytest.mark.unit_test @pytest.mark.parametrize( "filepath,expected", (("tests/r_qtl/test_files/empty_control_file_yaml.zip", (MissingFile("pheno", None, "Missing 'pheno' file 'None'."),)), ("tests/r_qtl/test_files/pheno_without_errors.zip", tuple()), ("tests/r_qtl/test_files/pheno_with_errors.zip", (InvalidValue(1, "liver", "61.92", ("Invalid value '61.92'. Expected numerical value " "with at least 3 decimal places.")), InvalidValue(2, "spleen", "brrr", ("Invalid value 'brrr'. Expected numerical value " "with at least 3 decimal places.")))))) def test_pheno_errors(filepath, expected): """ GIVEN: A R/qtl2 bundle WHEN: we check for pheno errors THEN: We should get a sequence of all errors present in the pheno file, or an empty sequence if no errors exist. """ with ZipFile(Path(filepath).absolute(), "r") as zfile: assert tuple(rqc.pheno_errors(zfile)) == expected