aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2024-01-04 17:35:48 +0300
committerFrederick Muriuki Muriithi2024-01-04 17:35:48 +0300
commit5f5cc8503ec10319ee4d9e7d6bd739cc6190561c (patch)
tree8d92b71cd7e4870693f696e97d5dad82651c8a76
parent6b2002f236188e3c2571af18642527b156ed60e0 (diff)
downloadgn-uploader-5f5cc8503ec10319ee4d9e7d6bd739cc6190561c.tar.gz
Parse sex information from R/qtl bundle.
-rw-r--r--r_qtl/r_qtl2.py35
-rw-r--r--tests/r_qtl/test_files/test_cross_info_01.zipbin597 -> 594 bytes
-rw-r--r--tests/r_qtl/test_files/test_cross_info_02.zipbin778 -> 775 bytes
-rw-r--r--tests/r_qtl/test_files/test_sex_info_01.zipbin0 -> 589 bytes
-rw-r--r--tests/r_qtl/test_files/test_sex_info_02.zipbin0 -> 773 bytes
-rw-r--r--tests/r_qtl/test_r_qtl2_sex_information.py35
6 files changed, 63 insertions, 7 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 02217ee..2c1e162 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -134,6 +134,13 @@ def make_process_data_geno(cdata) -> tuple[
for items in zip(ids, vals[1:]))
return (__non_transposed__, __transposed__)
+def replace_sex_info(val, cdata: dict):
+ """Replace sex information in files with values in the control data."""
+ sex_info = cdata.get("sex", False)
+ if bool(sex_info):
+ return sex_info.get(val, val)
+ return val
+
def replace_cross_info(val, cdata: dict):
"""
Replace cross information in files with the values in the control data.
@@ -148,15 +155,11 @@ def make_process_data_covar(cdata) -> tuple[
Callable[[str, tuple[str, ...], tuple[str, ...]],
tuple[dict, ...]]]:
"""Build functions to process sex and cross information in covar files."""
- def replace_sex_code(val):
- sex_info = cdata.get("sex", False)
- if bool(sex_info):
- return sex_info.get(val, val)
- return val
+ rep_sex_info = partial(replace_sex_info, cdata=cdata)
rep_cross_info = partial(replace_cross_info, cdata=cdata)
def non_transposed(row: dict) -> dict:
return {
- key: thread_op(value, replace_sex_code, rep_cross_info)
+ key: thread_op(value, rep_sex_info, rep_cross_info)
for key,value in row.items()
}
def transposed(id_key: str,
@@ -165,7 +168,7 @@ def make_process_data_covar(cdata) -> tuple[
return tuple(
dict(zip(
[id_key, vals[0]],
- (thread_op(item, replace_sex_code, rep_cross_info)
+ (thread_op(item, rep_sex_info, rep_cross_info)
for item in items)))
for items in zip(ids, vals[1:]))
return (non_transposed, transposed)
@@ -221,3 +224,21 @@ def cross_information(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
yield {
key: thread_op(value, partial(replace_cross_info, cdata=cdata))
for key, value in row.items() if key not in sex_fields}
+
+def sex_information(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
+ """Load cross information where present."""
+ cdata_sex_info = cdata.get("sex", {})
+ sex_info_file_key = "covar"
+ new_cdata = {**cdata}
+ ci_fields = (cdata.get("cross_info",{}).get("covar",""),)
+ if "file" in cdata_sex_info:
+ sex_info_file_key = "gnqc_sex_info_file"
+ new_cdata = {**cdata, "gnqc_sex_info_file": cdata_sex_info["file"]}
+
+ for row in file_data(zfile,
+ sex_info_file_key,
+ new_cdata,
+ *make_process_data_covar(cdata)):
+ yield {
+ key: thread_op(value, partial(replace_sex_info, cdata=cdata))
+ for key, value in row.items() if key not in ci_fields}
diff --git a/tests/r_qtl/test_files/test_cross_info_01.zip b/tests/r_qtl/test_files/test_cross_info_01.zip
index 1211f96..6143a9f 100644
--- a/tests/r_qtl/test_files/test_cross_info_01.zip
+++ b/tests/r_qtl/test_files/test_cross_info_01.zip
Binary files differ
diff --git a/tests/r_qtl/test_files/test_cross_info_02.zip b/tests/r_qtl/test_files/test_cross_info_02.zip
index dddd281..7e23fc4 100644
--- a/tests/r_qtl/test_files/test_cross_info_02.zip
+++ b/tests/r_qtl/test_files/test_cross_info_02.zip
Binary files differ
diff --git a/tests/r_qtl/test_files/test_sex_info_01.zip b/tests/r_qtl/test_files/test_sex_info_01.zip
new file mode 100644
index 0000000..bc60d84
--- /dev/null
+++ b/tests/r_qtl/test_files/test_sex_info_01.zip
Binary files differ
diff --git a/tests/r_qtl/test_files/test_sex_info_02.zip b/tests/r_qtl/test_files/test_sex_info_02.zip
new file mode 100644
index 0000000..c862b73
--- /dev/null
+++ b/tests/r_qtl/test_files/test_sex_info_02.zip
Binary files differ
diff --git a/tests/r_qtl/test_r_qtl2_sex_information.py b/tests/r_qtl/test_r_qtl2_sex_information.py
new file mode 100644
index 0000000..90eae8d
--- /dev/null
+++ b/tests/r_qtl/test_r_qtl2_sex_information.py
@@ -0,0 +1,35 @@
+"""Test loading of sex information."""
+
+from pathlib import Path
+
+import pytest
+from zipfile import ZipFile
+
+from r_qtl import r_qtl2 as rqtl2
+
+@pytest.mark.unit_test
+@pytest.mark.parametrize(
+ "filepath,expected",
+ (("tests/r_qtl/test_files/test_sex_info_01.zip",
+ ({"id": "1", "sex": "male"},
+ {"id": "2", "sex": "male"},
+ {"id": "3", "sex": "male"},
+ {"id": "71", "sex": "male"},
+ {"id": "72", "sex": "male"},
+ {"id": "146", "sex": "female"},
+ {"id": "147", "sex": "female"},
+ {"id": "148", "sex": "female"})),
+ ("tests/r_qtl/test_files/test_sex_info_02.zip",
+ ({"id": "1", "sex": "male"},
+ {"id": "2", "sex": "male"},
+ {"id": "3", "sex": "male"},
+ {"id": "71", "sex": "male"},
+ {"id": "72", "sex": "male"},
+ {"id": "146", "sex": "female"},
+ {"id": "147", "sex": "female"},
+ {"id": "148", "sex": "female"}))))
+def test_parse_sex_info(filepath, expected):
+ """Test parsing of sex information."""
+ with ZipFile(Path(filepath).absolute(), "r") as zfile:
+ assert tuple(rqtl2.sex_information(
+ zfile, rqtl2.control_data(zfile))) == expected