aboutsummaryrefslogtreecommitdiff
path: root/r_qtl/r_qtl2.py
diff options
context:
space:
mode:
Diffstat (limited to 'r_qtl/r_qtl2.py')
-rw-r--r--r_qtl/r_qtl2.py50
1 files changed, 34 insertions, 16 deletions
diff --git a/r_qtl/r_qtl2.py b/r_qtl/r_qtl2.py
index e019d99..47f101e 100644
--- a/r_qtl/r_qtl2.py
+++ b/r_qtl/r_qtl2.py
@@ -32,7 +32,6 @@ def control_data(zfile: ZipFile) -> dict:
def genotype_metadata(zfile: ZipFile, cdata: dict) -> dict:
"""Read Individual ID key and the marker names."""
- # TODO: Handle transposed files
line_num = 0
with zfile.open(cdata["geno"]) as genofile:
for line in filter(lambda line: not line.startswith("#"),
@@ -45,25 +44,48 @@ def genotype_metadata(zfile: ZipFile, cdata: dict) -> dict:
def genotype_data(zfile: ZipFile, cdata: dict) -> Iterator[dict]:
"""Load the genotype file, making use of the control data."""
- # TODO: Handle transposed files
- with zfile.open(cdata["geno"]) as genofile:
- reader = csv.DictReader(filter(lambda line: not line.startswith("#"),
- io.TextIOWrapper(genofile)),
- delimiter=cdata.get("sep", ","))
- if not cdata.get("geno_transposed", False):
+ def replace_genotype_codes(val):
+ return cdata["genotypes"].get(val, val)
+
+ def replace_na_strings(val):
+ return (None if val in cdata["na.strings"] else val)
+
+ if not cdata.get("geno_transposed", False):
+ with zfile.open(cdata["geno"]) as genofile:
+ reader = csv.DictReader(
+ filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(genofile)),
+ delimiter=cdata.get("sep", ","))
for row in reader:
yield {
key: thread_op(
value,
- # replace genotype codes
- lambda val: cdata["genotypes"].get(val, val),
- # replace N/A strings
- lambda val: (
- None if val in cdata["na.strings"] else val))
+ replace_genotype_codes,
+ replace_na_strings)
for key,value
in row.items()
}
+ def __merge__(key, samples, line):
+ marker = line[0]
+ return (
+ dict(zip(
+ [key, marker],
+ (thread_op(item, replace_genotype_codes, replace_na_strings)
+ for item in items)))
+ for items in zip(samples, line[1:]))
+
+ if cdata.get("geno_transposed", False):
+ with zfile.open(cdata["geno"]) as genofile:
+ lines = (line.strip().split(cdata.get("sep", ","))
+ for line in filter(lambda line: not line.startswith("#"),
+ io.TextIOWrapper(genofile)))
+ id_line = next(lines)
+ id_key, samples = id_line[0], id_line[1:]
+ for line in lines:
+ for row in __merge__(id_key, samples, line):
+ yield row
+
def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> dict:
"""Read gmap files to get the genome mapping data"""
assert map_type in ("genetic-map", "physical-map"), "Invalid map type"
@@ -71,10 +93,6 @@ def map_data(zfile: ZipFile, map_type: str, cdata: dict) -> dict:
"genetic-map": "gmap",
"physical-map": "pmap"
}[map_type]]
- # TODO: Might need to check `gmap_transposed` and `pmap_transposed` instead
- # of `geno_transposed` -- see
- # https://github.com/rqtl/qtl2data/blob/main/ArabMAGIC/arabmagic_tair8.json
- # for the *_transposed values
transposed_dict = {
"genetic-map": "gmap_transposed",
"physical-map": "pmap_transposed"