From c687f9460ca071c714cdc9cba62449b59b001de8 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 27 Dec 2023 12:12:33 +0300 Subject: Tests: Test parsing of non-transposed geno files. Check that the parsing of non-transposed geno files. Leave in failing test for transposed geno files. --- r_qtl/r_qtl2.py | 24 ++-- tests/r_qtl/test_files/test_geno.zip | Bin 0 -> 648 bytes tests/r_qtl/test_files/test_geno_transposed.zip | Bin 0 -> 702 bytes tests/r_qtl/test_r_qtl2.py | 48 ------- tests/r_qtl/test_r_qtl2_geno.py | 182 ++++++++++++++++++++++++ tests/r_qtl/test_r_qtl2_gmap.py | 48 +++++++ 6 files changed, 243 insertions(+), 59 deletions(-) create mode 100644 tests/r_qtl/test_files/test_geno.zip create mode 100644 tests/r_qtl/test_files/test_geno_transposed.zip delete mode 100644 tests/r_qtl/test_r_qtl2.py create mode 100644 tests/r_qtl/test_r_qtl2_geno.py create mode 100644 tests/r_qtl/test_r_qtl2_gmap.py diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index a221f26..e019d99 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -50,17 +50,19 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]: reader = csv.DictReader(filter(lambda line: not line.startswith("#"), io.TextIOWrapper(genofile)), delimiter=cdata.get("sep", ",")) - for row in reader: - yield { - key: thread_op( - value, - # replace genotype codes - lambda val: cdata["genotypes"].get(val, val), - # replace N/A strings - lambda val: (None if val in cdata["na.strings"] else val)) - for key,value - in row.items() - } + if not cdata.get("geno_transposed", False): + for row in reader: + yield { + key: thread_op( + value, + # replace genotype codes + lambda val: cdata["genotypes"].get(val, val), + # replace N/A strings + lambda val: ( + None if val in cdata["na.strings"] else val)) + for key,value + in row.items() + } def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> dict: """Read gmap files to get the genome mapping data""" diff --git a/tests/r_qtl/test_files/test_geno.zip b/tests/r_qtl/test_files/test_geno.zip new file mode 100644 index 0000000..9cac5f9 Binary files /dev/null and b/tests/r_qtl/test_files/test_geno.zip differ diff --git a/tests/r_qtl/test_files/test_geno_transposed.zip b/tests/r_qtl/test_files/test_geno_transposed.zip new file mode 100644 index 0000000..6ce50f8 Binary files /dev/null and b/tests/r_qtl/test_files/test_geno_transposed.zip differ diff --git a/tests/r_qtl/test_r_qtl2.py b/tests/r_qtl/test_r_qtl2.py deleted file mode 100644 index 33ff5e5..0000000 --- a/tests/r_qtl/test_r_qtl2.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Test the parsing of the R/qtl2 data files.""" -from pathlib import Path - -import pytest -from zipfile import ZipFile - -from r_qtl import r_qtl2 as rqtl2 - -@pytest.mark.unit_test -@pytest.mark.parametrize( - "relpath,mapfiletype,expected", - (("tests/r_qtl/test_files/test_gmap.zip", - "genetic-map", - ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, - {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, - {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, - {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, - {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), - ("tests/r_qtl/test_files/test_gmap_transposed.zip", - "genetic-map", - ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, - {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, - {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, - {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, - {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), - ("tests/r_qtl/test_files/test_pmap.zip", - "physical-map", - ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, - {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, - {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, - {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, - {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})), - ("tests/r_qtl/test_files/test_pmap_transposed.zip", - "physical-map", - ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, - {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, - {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, - {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, - {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})))) -def test_parse_map_files(relpath, mapfiletype, expected): - """ - GIVEN: A path to a zip file, `relpath` and the type of the map file, - WHEN: we parse the R/qtl2 map file, - THEN: ensure the parsed data is as expected. - """ - with ZipFile(Path(relpath).absolute(), "r") as zfile: - assert rqtl2.map_data( - zfile, mapfiletype, rqtl2.control_data(zfile)) == expected diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py new file mode 100644 index 0000000..5ebb5a9 --- /dev/null +++ b/tests/r_qtl/test_r_qtl2_geno.py @@ -0,0 +1,182 @@ +"""Test the parsing of the R/qtl2 geno files.""" +from pathlib import Path + +import pytest +from zipfile import ZipFile + +from r_qtl import r_qtl2 as rqtl2 + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "relpath,expected", + ( + ("tests/r_qtl/test_files/test_geno.zip", + ({ + "id": "1", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "2", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "3", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": None, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "4", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "5", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "6", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "7", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "8", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "9", + "PVV4": None, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "10", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + })), + ("tests/r_qtl/test_files/test_geno_transposed.zip", + ({ + "id": "1", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "2", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "3", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": None, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "4", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "5", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "6", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "7", + "PVV4": 1, + "AXR-1": 1, + "HH.335C-Col/PhyA": 1, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "8", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 1, + "EC.66C": 1 + }, + { + "id": "9", + "PVV4": None, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + }, + { + "id": "10", + "PVV4": 2, + "AXR-1": 2, + "HH.335C-Col/PhyA": 2, + "EC.480C": 2, + "EC.66C": 2 + })))) +def test_parse_geno_files(relpath,expected): + """ + GIVEN: Path to a zip file with R/qtl2 data + WHEN: we parse the geno file + THEN: ensure that the data we get is as expected + """ + with ZipFile(Path(relpath).absolute(), "r") as zfile: + assert tuple(rqtl2.genotype_data(zfile, rqtl2.control_data(zfile))) == expected diff --git a/tests/r_qtl/test_r_qtl2_gmap.py b/tests/r_qtl/test_r_qtl2_gmap.py new file mode 100644 index 0000000..64774c2 --- /dev/null +++ b/tests/r_qtl/test_r_qtl2_gmap.py @@ -0,0 +1,48 @@ +"""Test the parsing of the R/qtl2 gmap files.""" +from pathlib import Path + +import pytest +from zipfile import ZipFile + +from r_qtl import r_qtl2 as rqtl2 + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "relpath,mapfiletype,expected", + (("tests/r_qtl/test_files/test_gmap.zip", + "genetic-map", + ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, + {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, + {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, + {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, + {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), + ("tests/r_qtl/test_files/test_gmap_transposed.zip", + "genetic-map", + ({"marker": "PVV4", "chr": "1", "pos": "0.000000"}, + {"marker": "AXR-1", "chr": "1", "pos": "6.250674"}, + {"marker": "HH.335C-Col/PhyA", "chr": "1", "pos": "9.303868"}, + {"marker": "EC.480C", "chr": "1", "pos": "12.577629"}, + {"marker": "EC.66C", "chr": "1", "pos": "18.392830"})), + ("tests/r_qtl/test_files/test_pmap.zip", + "physical-map", + ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, + {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, + {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, + {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, + {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})), + ("tests/r_qtl/test_files/test_pmap_transposed.zip", + "physical-map", + ({"marker": "D1Mit18", "chr": "1", "pos": "52.418656"}, + {"marker": "D1Mit80", "chr": "1", "pos": "86.377953"}, + {"marker": "D1Mit17", "chr": "1", "pos": "189.571337"}, + {"marker": "D2Mit379", "chr": "2", "pos": "37.451062"}, + {"marker": "D2Mit75", "chr": "2", "pos": "80.584782"})))) +def test_parse_map_files(relpath, mapfiletype, expected): + """ + GIVEN: A path to a zip file, `relpath` and the type of the map file, + WHEN: we parse the R/qtl2 map file, + THEN: ensure the parsed data is as expected. + """ + with ZipFile(Path(relpath).absolute(), "r") as zfile: + assert rqtl2.map_data( + zfile, mapfiletype, rqtl2.control_data(zfile)) == expected -- cgit v1.2.3