aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-12-27 12:12:33 +0300
committerFrederick Muriuki Muriithi2023-12-27 12:14:53 +0300
commitc687f9460ca071c714cdc9cba62449b59b001de8 (patch)
tree48ad3343cbea9dddaa0e374a38cd11cb38c26fe5
parent3294adc11ada56046ff748c08b97faf0eeed11b2 (diff)
downloadgn-uploader-c687f9460ca071c714cdc9cba62449b59b001de8.tar.gz
Tests: Test parsing of non-transposed geno files.
Check that the parsing of non-transposed geno files. Leave in failing test for transposed geno files.
-rw-r--r--r_qtl/r_qtl2.py24
-rw-r--r--tests/r_qtl/test_files/test_geno.zipbin0 -> 648 bytes
-rw-r--r--tests/r_qtl/test_files/test_geno_transposed.zipbin0 -> 702 bytes
-rw-r--r--tests/r_qtl/test_r_qtl2_geno.py182
-rw-r--r--tests/r_qtl/test_r_qtl2_gmap.py (renamed from tests/r_qtl/test_r_qtl2.py)2
5 files changed, 196 insertions, 12 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index a221f26..e019d99 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -50,17 +50,19 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
reader = csv.DictReader(filter(lambda line: not line.startswith("#"),
io.TextIOWrapper(genofile)),
delimiter=cdata.get("sep", ","))
- for row in reader:
- yield {
- key: thread_op(
- value,
- # replace genotype codes
- lambda val: cdata["genotypes"].get(val, val),
- # replace N/A strings
- lambda val: (None if val in cdata["na.strings"] else val))
- for key,value
- in row.items()
- }
+ if not cdata.get("geno_transposed", False):
+ for row in reader:
+ yield {
+ key: thread_op(
+ value,
+ # replace genotype codes
+ lambda val: cdata["genotypes"].get(val, val),
+ # replace N/A strings
+ lambda val: (
+ None if val in cdata["na.strings"] else val))
+ for key,value
+ in row.items()
+ }
def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> dict:
"""Read gmap files to get the genome mapping data"""
diff --git a/tests/r_qtl/test_files/test_geno.zip b/tests/r_qtl/test_files/test_geno.zip
new file mode 100644
index 0000000..9cac5f9
--- /dev/null
+++ b/tests/r_qtl/test_files/test_geno.zip
Binary files differ
diff --git a/tests/r_qtl/test_files/test_geno_transposed.zip b/tests/r_qtl/test_files/test_geno_transposed.zip
new file mode 100644
index 0000000..6ce50f8
--- /dev/null
+++ b/tests/r_qtl/test_files/test_geno_transposed.zip
Binary files differ
diff --git a/tests/r_qtl/test_r_qtl2_geno.py b/tests/r_qtl/test_r_qtl2_geno.py
new file mode 100644
index 0000000..5ebb5a9
--- /dev/null
+++ b/tests/r_qtl/test_r_qtl2_geno.py
@@ -0,0 +1,182 @@
+"""Test the parsing of the R/qtl2 geno files."""
+from pathlib import Path
+
+import pytest
+from zipfile import ZipFile
+
+from r_qtl import r_qtl2 as rqtl2
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+ "relpath,expected",
+ (
+ ("tests/r_qtl/test_files/test_geno.zip",
+ ({
+ "id": "1",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "2",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "3",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": None,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "4",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "5",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "6",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "7",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "8",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "9",
+ "PVV4": None,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "10",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ })),
+ ("tests/r_qtl/test_files/test_geno_transposed.zip",
+ ({
+ "id": "1",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "2",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "3",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": None,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "4",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "5",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "6",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "7",
+ "PVV4": 1,
+ "AXR-1": 1,
+ "HH.335C-Col/PhyA": 1,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "8",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 1,
+ "EC.66C": 1
+ },
+ {
+ "id": "9",
+ "PVV4": None,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ },
+ {
+ "id": "10",
+ "PVV4": 2,
+ "AXR-1": 2,
+ "HH.335C-Col/PhyA": 2,
+ "EC.480C": 2,
+ "EC.66C": 2
+ }))))
+def test_parse_geno_files(relpath,expected):
+ """
+ GIVEN: Path to a zip file with R/qtl2 data
+ WHEN: we parse the geno file
+ THEN: ensure that the data we get is as expected
+ """
+ with ZipFile(Path(relpath).absolute(), "r") as zfile:
+ assert tuple(rqtl2.genotype_data(zfile, rqtl2.control_data(zfile))) == expected
diff --git a/tests/r_qtl/test_r_qtl2.py b/tests/r_qtl/test_r_qtl2_gmap.py
index 33ff5e5..64774c2 100644
--- a/tests/r_qtl/test_r_qtl2.py
+++ b/tests/r_qtl/test_r_qtl2_gmap.py
@@ -1,4 +1,4 @@
-"""Test the parsing of the R/qtl2 data files."""
+"""Test the parsing of the R/qtl2 gmap files."""
from pathlib import Path
import pytest