aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-09-01 07:35:40 +0300
committerMuriithi Frederick Muriuki2021-09-01 07:35:40 +0300
commitb975e0cfd1d0adc5f51e66292d29d4651d3f053f (patch)
tree9a995096f469c897b3ccb24b48c380cf27d0d4b3
parent221c773daea839ecf0e50c196484bb91e3a6db33 (diff)
downloadgenenetwork3-b975e0cfd1d0adc5f51e66292d29d4651d3f053f.tar.gz
Parse the genotype file's data header
* gn3/db/genotypes.py: parse data header * tests/unit/db/test_genotypes.py: check that header's parse works correctly. Add tests to check that the parser works as expected. Add code to implement the parsing and pass the tests.
-rw-r--r--gn3/db/genotypes.py19
-rw-r--r--tests/unit/db/test_genotypes.py22
2 files changed, 40 insertions, 1 deletions
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 2be3e1a..be0dfc2 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -87,3 +87,22 @@ def parse_genotype_labels(lines: list):
return tuple(
item for item in (__parse_label(line) for line in lines)
if item is not None)
+
+def parse_genotype_header(line: str, parlist = tuple()):
+ """
+ Parse the genotype file header line
+
+ DESCRIPTION:
+ Reworks
+ https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114
+ """
+ items = [item.strip() for item in line.split("\t")]
+ Mbmap = "Mb" in items
+ prgy = ((parlist + tuple(items[4:])) if Mbmap
+ else (parlist + tuple(items[3:])))
+ return (
+ ("Mbmap", Mbmap),
+ ("cm_column", items.index("cM")),
+ ("mb_column", None if not Mbmap else items.index("Mb")),
+ ("prgy", prgy),
+ ("nprgy", len(prgy)))
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
index 0264764..4fa8a53 100644
--- a/tests/unit/db/test_genotypes.py
+++ b/tests/unit/db/test_genotypes.py
@@ -1,6 +1,6 @@
"""Tests gn3.db.genotypes"""
from unittest import TestCase
-from gn3.db.genotypes import parse_genotype_labels
+from gn3.db.genotypes import parse_genotype_labels, parse_genotype_header
class TestGenotypes(TestCase):
"""Tests for functions in `gn3.db.genotypes`."""
@@ -15,3 +15,23 @@ class TestGenotypes(TestCase):
(("group", "test_group"), ("filler", "test_filler"),
("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
("het", "test_het"), ("unk", "test_unk")))
+
+ def test_parse_genotype_header(self):
+ for header, expected in [
+ [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t"
+ "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"),
+ (("Mbmap", True), ("cm_column", 2), ("mb_column", 3),
+ ("prgy",
+ ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+ "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18",
+ "BXD19")),
+ ("nprgy", 14))],
+ [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11"
+ "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"),
+ (("Mbmap", False), ("cm_column", 2), ("mb_column", None),
+ ("prgy",
+ ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+ "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")),
+ ("nprgy", 13))]]:
+ with self.subTest(header=header):
+ self.assertEqual(parse_genotype_header(header), expected)