1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
"""Test that the QC functions work as expected"""
from pathlib import Path
import pytest
from zipfile import ZipFile
from r_qtl import r_qtl2 as rqtl2
from r_qtl import r_qtl2_qc as rqc
@pytest.mark.unit_test
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
tuple()),
("tests/r_qtl/test_files/empty_control_file_json.zip",
tuple()),
("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
"gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
"pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
"phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
"phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
"phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
"crossinfo01.csv", "crossinfo02.csv")),
("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
"pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
"gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
"phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
"crossinfo.csv")),
("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
"pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
"gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
"phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
"crossinfo.csv"))))
def test_bundle_files_list(filepath, expected):
"""
GIVEN: R/qtl2 bundle with a control file listing files
WHEN: `bundle_files_list` is called on the bundle
THEN: verify that ALL files listed in the control file are returned.
"""
with ZipFile(Path(filepath).absolute(), "r") as zfile:
assert rqc.bundle_files_list(rqtl2.control_data(zfile)) == expected
@pytest.mark.unit_test
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
tuple()),
("tests/r_qtl/test_files/empty_control_file_json.zip",
tuple()),
("tests/r_qtl/test_files/allfilesmissing_stringmembers.zip",
("geno.csv", "fgeno.csv", "pheno.csv", "covar.csv", "phenocovar.csv",
"gmap.csv", "pmap.csv", "phenose.csv", "sex.csv", "crossinfo.csv")),
("tests/r_qtl/test_files/allfilesmissing_listmembers.zip",
("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
"pheno01.csv", "pheno02.csv", "covar01.csv", "covar02.csv",
"phenocovar01.csv", "phenocovar02.csv", "phenocovar03.csv",
"phenocovar04.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv",
"phenose01.csv", "phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
"crossinfo01.csv", "crossinfo02.csv")),
("tests/r_qtl/test_files/allfilesmissing_mixedmembers.zip",
("geno01.csv", "geno02.csv", "fgeno01.csv", "fgeno02.csv", "fgeno03.csv",
"pheno01.csv", "pheno02.csv", "covar.csv", "phenocovar.csv",
"gmap01.csv", "gmap02.csv", "pmap01.csv", "pmap02.csv", "phenose01.csv",
"phenose02.csv", "sex01.csv", "sex02.csv", "sex03.csv",
"crossinfo.csv")),
("tests/r_qtl/test_files/somefilesmissing_mixedmembers.zip",
("fgeno01.csv", "covar.csv", "gmap01.csv", "gmap02.csv", "pmap01.csv",
"pmap02.csv", "phenose02.csv", "sex03.csv", "crossinfo.csv"))))
def test_missing_files(filepath, expected):
"""
GIVEN: R/qtl2 bundle with a control file listing files
WHEN: `missing_files` is called on the bundle
THEN: verify that ALL files listed in the control file, that do not actually
exist in the bundle are returned.
"""
with ZipFile(Path(filepath).absolute(), "r") as zfile:
assert rqc.missing_files(zfile) == expected
@pytest.mark.unit_test
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
((None, None, "Missing 'geno' file."),)),
("tests/r_qtl/test_files/test_geno.zip",
tuple()),
("tests/r_qtl/test_files/geno_with_missing_genotypes.zip",
((1, "AXR-1", "Invalid value 'X'. Expected one of ('L', 'C')"),
(2, "EC.480C", "Invalid value 'Y'. Expected one of ('L', 'C')"),
(6, "HH.335C-Col/PhyA", f"Invalid value 'H'. Expected one of ('L', 'C')")))))
def test_geno_errors(filepath, expected):
"""
GIVEN: A R/qtl2 bundle
WHEN: We call r_qtl.r_qtl2_qc.geno_errors(..) on it
THEN: We should get a sequence of all errors present in the file, or an
empty sequence if no errors exist.
"""
with ZipFile(Path(filepath).absolute(), "r") as zfile:
assert tuple(rqc.geno_errors(zfile)) == expected
@pytest.mark.unit_test
@pytest.mark.parametrize(
"filepath,expected",
(("tests/r_qtl/test_files/empty_control_file_yaml.zip",
((None, None, "Missing 'pheno' file."),)),
("tests/r_qtl/test_files/pheno_without_errors.zip",
tuple()),
("tests/r_qtl/test_files/pheno_with_errors.zip",
((1, "liver", ("Invalid value '61.92'. Expected numerical value "
"with at least 3 decimal places.")),
(2, "spleen", ("Invalid value 'brrr'. Expected numerical value "
"with at least 3 decimal places."))))))
def test_pheno_errors(filepath, expected):
"""
GIVEN: A R/qtl2 bundle
WHEN: we check for pheno errors
THEN: We should get a sequence of all errors present in the pheno file, or
an empty sequence if no errors exist.
"""
with ZipFile(Path(filepath).absolute(), "r") as zfile:
assert tuple(rqc.pheno_errors(zfile)) == expected
|