about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-04 17:08:47 +0300
committerFrederick Muriuki Muriithi2024-01-04 17:08:47 +0300
commit6b2002f236188e3c2571af18642527b156ed60e0 (patch)
tree27127f01a2868cb07ea271536d724a840df69ee8
parent9eab14f7f6c40c401c29d11d69ac54303415ab87 (diff)
downloadgn-uploader-6b2002f236188e3c2571af18642527b156ed60e0.tar.gz
Parse cross information from R/qtl2 bundle.
-rw-r--r--r_qtl/r_qtl2.py39
-rw-r--r--tests/r_qtl/test_files/test_cross_info_01.zipbin0 -> 597 bytes
-rw-r--r--tests/r_qtl/test_files/test_cross_info_02.zipbin0 -> 778 bytes
-rw-r--r--tests/r_qtl/test_r_qtl2_cross_information.py35
4 files changed, 66 insertions, 8 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 682542b..02217ee 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -3,7 +3,7 @@ import io
 import csv
 import json
 from zipfile import ZipFile
-from functools import reduce
+from functools import reduce, partial
 from typing import Iterator, Iterable, Callable
 
 import yaml
@@ -134,6 +134,15 @@ def make_process_data_geno(cdata) -> tuple[
             for items in zip(ids, vals[1:]))
     return (__non_transposed__, __transposed__)
 
+def replace_cross_info(val, cdata: dict):
+    """
+    Replace cross information in files with the values in the control data.
+    """
+    cross_info = cdata.get("cross_info", False)
+    if bool(cross_info):
+        return cross_info.get(val, val)
+    return val
+
 def make_process_data_covar(cdata) -> tuple[
         Callable[[dict], dict],
         Callable[[str, tuple[str, ...], tuple[str, ...]],
@@ -144,14 +153,10 @@ def make_process_data_covar(cdata) -> tuple[
         if bool(sex_info):
             return sex_info.get(val, val)
         return val
-    def replace_cross_info(val):
-        cross_info = cdata.get("cross_info", False)
-        if bool(cross_info):
-            return cross_info.get(val, val)
-        return val
+    rep_cross_info = partial(replace_cross_info, cdata=cdata)
     def non_transposed(row: dict) -> dict:
         return {
-            key: thread_op(value, replace_sex_code, replace_cross_info)
+            key: thread_op(value, replace_sex_code, rep_cross_info)
             for key,value in row.items()
         }
     def transposed(id_key: str,
@@ -160,7 +165,7 @@ def make_process_data_covar(cdata) -> tuple[
         return tuple(
             dict(zip(
                 [id_key, vals[0]],
-                (thread_op(item, replace_sex_code, replace_cross_info)
+                (thread_op(item, replace_sex_code, rep_cross_info)
                  for item in items)))
             for items in zip(ids, vals[1:]))
     return (non_transposed, transposed)
@@ -198,3 +203,21 @@ def file_data(zfile: ZipFile,
     for row in with_transposed(
             zfile, member_key, cdata, process_transposed_value):
         yield row
+
+def cross_information(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
+    """Load cross information where present."""
+    cdata_cross_info = cdata.get("cross_info", {})
+    cross_info_file_key = "covar"
+    new_cdata = {**cdata}
+    sex_fields = (cdata.get("sex",{}).get("covar",""),)
+    if "file" in cdata_cross_info:
+        cross_info_file_key = "gnqc_cross_info_file"
+        new_cdata = {**cdata, "gnqc_cross_info_file": cdata_cross_info["file"]}
+
+    for row in file_data(zfile,
+                         cross_info_file_key,
+                         new_cdata,
+                         *make_process_data_covar(cdata)):
+        yield {
+            key: thread_op(value, partial(replace_cross_info, cdata=cdata))
+            for key, value in row.items() if key not in sex_fields}
diff --git a/tests/r_qtl/test_files/test_cross_info_01.zip b/tests/r_qtl/test_files/test_cross_info_01.zip
new file mode 100644
index 0000000..1211f96
--- /dev/null
+++ b/tests/r_qtl/test_files/test_cross_info_01.zip
Binary files differdiff --git a/tests/r_qtl/test_files/test_cross_info_02.zip b/tests/r_qtl/test_files/test_cross_info_02.zip
new file mode 100644
index 0000000..dddd281
--- /dev/null
+++ b/tests/r_qtl/test_files/test_cross_info_02.zip
Binary files differdiff --git a/tests/r_qtl/test_r_qtl2_cross_information.py b/tests/r_qtl/test_r_qtl2_cross_information.py
new file mode 100644
index 0000000..f424267
--- /dev/null
+++ b/tests/r_qtl/test_r_qtl2_cross_information.py
@@ -0,0 +1,35 @@
+"""Test loading of cross information."""
+
+from pathlib import Path
+
+import pytest
+from zipfile import ZipFile
+
+from r_qtl import r_qtl2 as rqtl2
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+    "filepath,expected",
+    (("tests/r_qtl/test_files/test_cross_info_01.zip",
+      ({"id": "1", "cross_direction": 1},
+       {"id": "2", "cross_direction": 1},
+       {"id": "3", "cross_direction": 1},
+       {"id": "71", "cross_direction": 0},
+       {"id": "72", "cross_direction": 0},
+       {"id": "146", "cross_direction": 1},
+       {"id": "147", "cross_direction": 1},
+       {"id": "148", "cross_direction": 1})),
+     ("tests/r_qtl/test_files/test_cross_info_02.zip",
+      ({"id": "1", "cross_direction": 1},
+       {"id": "2", "cross_direction": 1},
+       {"id": "3", "cross_direction": 1},
+       {"id": "71", "cross_direction": 0},
+       {"id": "72", "cross_direction": 0},
+       {"id": "146", "cross_direction": 1},
+       {"id": "147", "cross_direction": 1},
+       {"id": "148", "cross_direction": 1}))))
+def test_parse_cross_info(filepath, expected):
+    """Test parsing of cross information."""
+    with ZipFile(Path(filepath).absolute(), "r") as zfile:
+        assert tuple(rqtl2.cross_information(
+            zfile, rqtl2.control_data(zfile))) == expected