aboutsummaryrefslogtreecommitdiff
"""Test that the QC functions work as expected"""
from pathlib import Path

import pytest
from zipfile import ZipFile

from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
from r_qtl.fileerrors import MissingFile

from quality_control.errors import InvalidValue

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      tuple()),
     ("tests/r_qtl/test_files/empty_control_file_json.zip",
      tuple()),
     ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
      (("geno", "geno.csv"), ("founder_geno", "fgeno.csv"),
       ("pheno", "pheno.csv"), ("covar", "covar.csv"),
       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap.csv"),
       ("pmap", "pmap.csv"), ("phenose", "phenose.csv"),
       ("sex.file", "sex.csv"), ("cross_info.file", "crossinfo.csv"))),
     ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
       ("pheno", "pheno02.csv"), ("covar", "covar01.csv"),
       ("covar", "covar02.csv"), ("phenocovar", "phenocovar01.csv"),
       ("phenocovar", "phenocovar02.csv"), ("phenocovar", "phenocovar03.csv"),
       ("phenocovar", "phenocovar04.csv"), ("gmap", "gmap01.csv"),
       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo01.csv"),
       ("cross_info.file", "crossinfo02.csv"))),
     ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv"))),
     ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv")))))
def test_bundle_files_list(filepath, expected):
    """
    GIVEN: R/qtl2 bundle with a control file listing files
    WHEN: `bundle_files_list` is called on the bundle
    THEN: verify that ALL files listed in the control file are returned.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert rqc.bundle_files_list(rqtl2.control_data(zfile)) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      tuple()),
     ("tests/r_qtl/test_files/empty_control_file_json.zip",
      tuple()),
     ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
      (("geno", "geno.csv"), ("founder_geno", "fgeno.csv"),
       ("pheno", "pheno.csv"), ("covar", "covar.csv"),
       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap.csv"),
       ("pmap", "pmap.csv"), ("phenose", "phenose.csv"),
       ("sex.file", "sex.csv"), ("cross_info.file", "crossinfo.csv"))),
     ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
       ("pheno", "pheno02.csv"), ("covar", "covar01.csv"),
       ("covar", "covar02.csv"), ("phenocovar", "phenocovar01.csv"),
       ("phenocovar", "phenocovar02.csv"), ("phenocovar", "phenocovar03.csv"),
       ("phenocovar", "phenocovar04.csv"), ("gmap", "gmap01.csv"),
       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo01.csv"),
       ("cross_info.file", "crossinfo02.csv"))),
     ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
      (("geno", "geno01.csv"), ("geno", "geno02.csv"),
       ("founder_geno", "fgeno01.csv"), ("founder_geno", "fgeno02.csv"),
       ("founder_geno", "fgeno03.csv"), ("pheno", "pheno01.csv"),
       ("pheno", "pheno02.csv"), ("covar", "covar.csv"),
       ("phenocovar", "phenocovar.csv"), ("gmap", "gmap01.csv"),
       ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"), ("pmap", "pmap02.csv"),
       ("phenose", "phenose01.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex01.csv"), ("sex.file", "sex02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv"))),
     ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
      (("founder_geno", "fgeno01.csv"), ("covar", "covar.csv"),
       ("gmap", "gmap01.csv"), ("gmap", "gmap02.csv"), ("pmap", "pmap01.csv"),
       ("pmap", "pmap02.csv"), ("phenose", "phenose02.csv"),
       ("sex.file", "sex03.csv"), ("cross_info.file", "crossinfo.csv")))))
def test_missing_files(filepath, expected):
    """
    GIVEN: R/qtl2 bundle with a control file listing files
    WHEN: `missing_files` is called on the bundle
    THEN: verify that ALL files listed in the control file, that do not actually
        exist in the bundle are returned.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert rqc.missing_files(zfile) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      (MissingFile("geno", None, "Missing 'geno' file 'None'."),)),
     ("tests/r_qtl/test_files/test_geno.zip",
      tuple()),
     ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip",
      (InvalidValue("geno", 1, "AXR-1", "X",
                    "Invalid value 'X'. Expected one of ('L', 'C')."),
       InvalidValue("geno", 2, "EC.480C",
                    "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."),
       InvalidValue("geno", 6, "HH.335C-Col/PhyA", "H",
                    f"Invalid value 'H'. Expected one of ('L', 'C').")))))
def test_geno_errors(filepath, expected):
    """
    GIVEN: A R/qtl2 bundle
    WHEN: We call r_qtl.r_qtl2_qc.geno_errors(..) on it
    THEN: We should get a sequence of all errors present in the file, or an
        empty sequence if no errors exist.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert tuple(rqc.geno_errors(zfile)) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      (MissingFile("pheno", None, "Missing 'pheno' file 'None'."),)),
     ("tests/r_qtl/test_files/pheno_without_errors.zip",
      tuple()),
     ("tests/r_qtl/test_files/pheno_with_errors.zip",
      (InvalidValue("pheno", 1, "liver", "61.92", (
          "Invalid value '61.92'. Expected numerical value "
          "with at least 3 decimal places.")),
       InvalidValue("pheno", 2, "spleen", "brrr", (
           "Invalid value 'brrr'. Expected numerical value "
           "with at least 3 decimal places."))))))
def test_pheno_errors(filepath, expected):
    """
    GIVEN: A R/qtl2 bundle
    WHEN: we check for pheno errors
    THEN: We should get a sequence of all errors present in the pheno file, or
        an empty sequence if no errors exist.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert tuple(rqc.pheno_errors(zfile)) == expected