aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-12-25 11:20:11 +0300
committerFrederick Muriuki Muriithi2023-12-25 11:20:11 +0300
commit69474e758208ccabe1ae0c4b62403c043a4cd7eb (patch)
treeb60372064bf77e8331c88538df8316162351a681 /r_qtl
parent3723cc8fe3977f292e636e98278b73c88b2b9677 (diff)
downloadgn-uploader-69474e758208ccabe1ae0c4b62403c043a4cd7eb.tar.gz
Read genetic map files
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py30
1 files changed, 29 insertions, 1 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 508d3eb..94fa842 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -8,7 +8,6 @@ from functools import reduce
from typing import Any, List, Union, Iterator
from zipfile import ZipFile, ZipInfo, is_zipfile
-from quality_control.debug import __pk__
from r_qtl.errors import InvalidFormat
def thread_op(value, *functions):
@@ -63,6 +62,35 @@ def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
in row.items()
}
+def genetic_map_data(zfile: ZipFile, cdata: dict) -> dict:
+ """Read gmap files to get the genome mapping data"""
+ if not cdata.get("geno_transposed", False):
+ with zfile.open(cdata["gmap"]) as gmapfile:
+ reader = csv.DictReader(
+ filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(gmapfile)),
+ delimiter=cdata.get("sep", ","))
+ return {
+ line["marker"]: {
+ key: value for key,value in line.items() if key != "marker"
+ } for line in reader
+ }
+
+ with zfile.open(cdata["gmap"]) as gmapfile:
+ lines = [[field.strip() for field in
+ line.strip().split(cdata.get("sep", ","))]
+ for line in
+ filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(gmapfile))]
+
+ headers = tuple(line[0] for line in lines)
+ return reduce(
+ lambda gmap, row: {
+ **gmap,
+ row[0]: dict(zip(headers[1:], row[1:]))},
+ zip(*(line[1:] for line in lines)),
+ {})
+
def read_r_qtl2_files(filepath: Path):
"""Read R/qtl2 format zip files."""
with ZipFile(filepath, "r") as zfile: