From 6b2002f236188e3c2571af18642527b156ed60e0 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 4 Jan 2024 17:08:47 +0300 Subject: Parse cross information from R/qtl2 bundle. --- r_qtl/r_qtl2.py | 39 ++++++++++++++++++++------ tests/r_qtl/test_files/test_cross_info_01.zip | Bin 0 -> 597 bytes tests/r_qtl/test_files/test_cross_info_02.zip | Bin 0 -> 778 bytes tests/r_qtl/test_r_qtl2_cross_information.py | 35 +++++++++++++++++++++++ 4 files changed, 66 insertions(+), 8 deletions(-) create mode 100644 tests/r_qtl/test_files/test_cross_info_01.zip create mode 100644 tests/r_qtl/test_files/test_cross_info_02.zip create mode 100644 tests/r_qtl/test_r_qtl2_cross_information.py diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py index 682542b..02217ee 100644 --- a/r_qtl/r_qtl2.py +++ b/r_qtl/r_qtl2.py @@ -3,7 +3,7 @@ import io import csv import json from zipfile import ZipFile -from functools import reduce +from functools import reduce, partial from typing import Iterator, Iterable, Callable import yaml @@ -134,6 +134,15 @@ def make_process_data_geno(cdata) -> tuple[ for items in zip(ids, vals[1:])) return (__non_transposed__, __transposed__) +def replace_cross_info(val, cdata: dict): + """ + Replace cross information in files with the values in the control data. + """ + cross_info = cdata.get("cross_info", False) + if bool(cross_info): + return cross_info.get(val, val) + return val + def make_process_data_covar(cdata) -> tuple[ Callable[[dict], dict], Callable[[str, tuple[str, ...], tuple[str, ...]], @@ -144,14 +153,10 @@ def make_process_data_covar(cdata) -> tuple[ if bool(sex_info): return sex_info.get(val, val) return val - def replace_cross_info(val): - cross_info = cdata.get("cross_info", False) - if bool(cross_info): - return cross_info.get(val, val) - return val + rep_cross_info = partial(replace_cross_info, cdata=cdata) def non_transposed(row: dict) -> dict: return { - key: thread_op(value, replace_sex_code, replace_cross_info) + key: thread_op(value, replace_sex_code, rep_cross_info) for key,value in row.items() } def transposed(id_key: str, @@ -160,7 +165,7 @@ def make_process_data_covar(cdata) -> tuple[ return tuple( dict(zip( [id_key, vals[0]], - (thread_op(item, replace_sex_code, replace_cross_info) + (thread_op(item, replace_sex_code, rep_cross_info) for item in items))) for items in zip(ids, vals[1:])) return (non_transposed, transposed) @@ -198,3 +203,21 @@ def file_data(zfile: ZipFile, for row in with_transposed( zfile, member_key, cdata, process_transposed_value): yield row + +def cross_information(zfile: ZipFile, cdata: dict) -> Iterator[dict]: + """Load cross information where present.""" + cdata_cross_info = cdata.get("cross_info", {}) + cross_info_file_key = "covar" + new_cdata = {**cdata} + sex_fields = (cdata.get("sex",{}).get("covar",""),) + if "file" in cdata_cross_info: + cross_info_file_key = "gnqc_cross_info_file" + new_cdata = {**cdata, "gnqc_cross_info_file": cdata_cross_info["file"]} + + for row in file_data(zfile, + cross_info_file_key, + new_cdata, + *make_process_data_covar(cdata)): + yield { + key: thread_op(value, partial(replace_cross_info, cdata=cdata)) + for key, value in row.items() if key not in sex_fields} diff --git a/tests/r_qtl/test_files/test_cross_info_01.zip b/tests/r_qtl/test_files/test_cross_info_01.zip new file mode 100644 index 0000000..1211f96 Binary files /dev/null and b/tests/r_qtl/test_files/test_cross_info_01.zip differ diff --git a/tests/r_qtl/test_files/test_cross_info_02.zip b/tests/r_qtl/test_files/test_cross_info_02.zip new file mode 100644 index 0000000..dddd281 Binary files /dev/null and b/tests/r_qtl/test_files/test_cross_info_02.zip differ diff --git a/tests/r_qtl/test_r_qtl2_cross_information.py b/tests/r_qtl/test_r_qtl2_cross_information.py new file mode 100644 index 0000000..f424267 --- /dev/null +++ b/tests/r_qtl/test_r_qtl2_cross_information.py @@ -0,0 +1,35 @@ +"""Test loading of cross information.""" + +from pathlib import Path + +import pytest +from zipfile import ZipFile + +from r_qtl import r_qtl2 as rqtl2 + +@pytest.mark.unit_test +@pytest.mark.parametrize( + "filepath,expected", + (("tests/r_qtl/test_files/test_cross_info_01.zip", + ({"id": "1", "cross_direction": 1}, + {"id": "2", "cross_direction": 1}, + {"id": "3", "cross_direction": 1}, + {"id": "71", "cross_direction": 0}, + {"id": "72", "cross_direction": 0}, + {"id": "146", "cross_direction": 1}, + {"id": "147", "cross_direction": 1}, + {"id": "148", "cross_direction": 1})), + ("tests/r_qtl/test_files/test_cross_info_02.zip", + ({"id": "1", "cross_direction": 1}, + {"id": "2", "cross_direction": 1}, + {"id": "3", "cross_direction": 1}, + {"id": "71", "cross_direction": 0}, + {"id": "72", "cross_direction": 0}, + {"id": "146", "cross_direction": 1}, + {"id": "147", "cross_direction": 1}, + {"id": "148", "cross_direction": 1})))) +def test_parse_cross_info(filepath, expected): + """Test parsing of cross information.""" + with ZipFile(Path(filepath).absolute(), "r") as zfile: + assert tuple(rqtl2.cross_information( + zfile, rqtl2.control_data(zfile))) == expected -- cgit v1.2.3