aboutsummaryrefslogtreecommitdiff
path: root/r_qtl
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2023-12-20 15:57:38 +0300
committerFrederick Muriuki Muriithi2023-12-20 15:57:38 +0300
commit3723cc8fe3977f292e636e98278b73c88b2b9677 (patch)
tree35f2588d902b5d78a85946de2e9ed45c1e73c847 /r_qtl
parent16fc2613bb7f3e5f3596f1470e0de2741bd55f5a (diff)
downloadgn-uploader-3723cc8fe3977f292e636e98278b73c88b2b9677.tar.gz
Read genotype files
Diffstat (limited to 'r_qtl')
-rw-r--r--r_qtl/r_qtl2.py40
1 files changed, 39 insertions, 1 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index 9c9d67b..508d3eb 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -1,13 +1,20 @@
"""The R/qtl2 parsing and processing code."""
+import io
+import csv
import json
import yaml
from pathlib import Path
-from typing import List, Union
+from functools import reduce
+from typing import Any, List, Union, Iterator
from zipfile import ZipFile, ZipInfo, is_zipfile
from quality_control.debug import __pk__
from r_qtl.errors import InvalidFormat
+def thread_op(value, *functions):
+ """Thread the `value` through the sequence of `functions`."""
+ return reduce(lambda result, func: func(result), functions, value)
+
def control_data(zfile: ZipFile) -> dict:
"""Retrieve the control file from the zip file info."""
files = tuple(filename
@@ -24,6 +31,37 @@ def control_data(zfile: ZipFile) -> dict:
if files[0].endswith(".json")
else yaml.safe_load(zfile.read(files[0])))
+def genotype_metadata(zfile: ZipFile, cdata: dict) -> dict:
+ """Read Individual ID key and the marker names."""
+ # TODO: Handle transposed files
+ line_num = 0
+ with zfile.open(cdata["geno"]) as genofile:
+ for line in filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(genofile)):
+ line_parts = line.strip().split(cdata.get("sep", ","))
+ return {
+ "individual_id_key": line_parts[0].strip(),
+ "markers": tuple(marker.strip() for marker in line_parts[1:])
+ }
+
+def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
+ """Load the genotype file, making use of the control data."""
+ # TODO: Handle transposed files
+ with zfile.open(cdata["geno"]) as genofile:
+ reader = csv.DictReader(filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(genofile)),
+ delimiter=cdata.get("sep", ","))
+ for row in reader:
+ yield {
+ key: thread_op(
+ value,
+ # replace genotype codes
+ lambda val: cdata["genotypes"].get(val, val),
+ # replace N/A strings
+ lambda val: (None if val in cdata["na.strings"] else val))
+ for key,value
+ in row.items()
+ }
def read_r_qtl2_files(filepath: Path):
"""Read R/qtl2 format zip files."""