aboutsummaryrefslogtreecommitdiff
path: root/tests/r_qtl/test_r_qtl2_qc.py
blob: 554cfc4ab210757a81bae83766aa483662314712 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""Test that the QC functions work as expected"""
from pathlib import Path

import pytest
from zipfile import ZipFile

from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
from r_qtl.fileerrors import MissingFile

from quality_control.errors import InvalidValue

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      tuple()),
     ("tests/r_qtl/test_files/empty_control_file_json.zip",
      tuple()),
     ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
      ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
       "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
     ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
       "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
       "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
       "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
       "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
       "crossinfo01.csv", "crossinfo02.csv")),
     ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
       "crossinfo.csv")),
     ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
       "crossinfo.csv"))))
def test_bundle_files_list(filepath, expected):
    """
    GIVEN: R/qtl2 bundle with a control file listing files
    WHEN: `bundle_files_list` is called on the bundle
    THEN: verify that ALL files listed in the control file are returned.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert rqc.bundle_files_list(rqtl2.control_data(zfile)) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      tuple()),
     ("tests/r_qtl/test_files/empty_control_file_json.zip",
      tuple()),
     ("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
      ("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
       "gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
     ("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
       "pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
       "phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
       "phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
       "phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
       "crossinfo01.csv", "crossinfo02.csv")),
     ("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
      ("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
       "pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
       "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
       "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
       "crossinfo.csv")),
     ("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
      ("fgeno01.csv", "covar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv",
       "pmap02.csv", "phenose02.csv", "sex03.csv", "crossinfo.csv"))))
def test_missing_files(filepath, expected):
    """
    GIVEN: R/qtl2 bundle with a control file listing files
    WHEN: `missing_files` is called on the bundle
    THEN: verify that ALL files listed in the control file, that do not actually
        exist in the bundle are returned.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert rqc.missing_files(zfile) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      (MissingFile("geno", None, "Missing 'geno' file 'None'."),)),
     ("tests/r_qtl/test_files/test_geno.zip",
      tuple()),
     ("tests/r_qtl/test_files/geno_with_missing_genotypes.zip",
      (InvalidValue(1, "AXR-1", "X", "Invalid value 'X'. Expected one of ('L', 'C')."),
       InvalidValue(2, "EC.480C", "Y", "Invalid value 'Y'. Expected one of ('L', 'C')."),
       InvalidValue(6, "HH.335C-Col/PhyA", "H", f"Invalid value 'H'. Expected one of ('L', 'C').")))))
def test_geno_errors(filepath, expected):
    """
    GIVEN: A R/qtl2 bundle
    WHEN: We call r_qtl.r_qtl2_qc.geno_errors(..) on it
    THEN: We should get a sequence of all errors present in the file, or an
        empty sequence if no errors exist.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert tuple(rqc.geno_errors(zfile)) == expected

@pytest.mark.unit_test
@pytest.mark.parametrize(
    "filepath,expected",
    (("tests/r_qtl/test_files/empty_control_file_yaml.zip",
      (MissingFile("pheno", None, "Missing 'pheno' file 'None'."),)),
     ("tests/r_qtl/test_files/pheno_without_errors.zip",
      tuple()),
     ("tests/r_qtl/test_files/pheno_with_errors.zip",
      (InvalidValue(1, "liver", "61.92", ("Invalid value '61.92'. Expected numerical value "
                                 "with at least 3 decimal places.")),
       InvalidValue(2, "spleen", "brrr", ("Invalid value 'brrr'. Expected numerical value "
                                  "with at least 3 decimal places."))))))
def test_pheno_errors(filepath, expected):
    """
    GIVEN: A R/qtl2 bundle
    WHEN: we check for pheno errors
    THEN: We should get a sequence of all errors present in the pheno file, or
        an empty sequence if no errors exist.
    """
    with ZipFile(Path(filepath).absolute(), "r") as zfile:
        assert tuple(rqc.pheno_errors(zfile)) == expected