about summary refs log tree commit diff
path: root/gn3
diff options
context:
space:
mode:
authorMuriithi Frederick Muriuki2021-09-01 07:35:40 +0300
committerMuriithi Frederick Muriuki2021-09-01 07:35:40 +0300
commitb975e0cfd1d0adc5f51e66292d29d4651d3f053f (patch)
tree9a995096f469c897b3ccb24b48c380cf27d0d4b3 /gn3
parent221c773daea839ecf0e50c196484bb91e3a6db33 (diff)
downloadgenenetwork3-b975e0cfd1d0adc5f51e66292d29d4651d3f053f.tar.gz
Parse the genotype file's data header
* gn3/db/genotypes.py: parse data header
* tests/unit/db/test_genotypes.py: check that header's parse works correctly.

  Add tests to check that the parser works as expected. Add code to implement
  the parsing and pass the tests.
Diffstat (limited to 'gn3')
-rw-r--r--gn3/db/genotypes.py19
1 files changed, 19 insertions, 0 deletions
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 2be3e1a..be0dfc2 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -87,3 +87,22 @@ def parse_genotype_labels(lines: list):
     return tuple(
         item for item in (__parse_label(line) for line in lines)
         if item is not None)
+
+def parse_genotype_header(line: str, parlist = tuple()):
+    """
+    Parse the genotype file header line
+
+    DESCRIPTION:
+    Reworks
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114
+    """
+    items = [item.strip() for item in line.split("\t")]
+    Mbmap = "Mb" in items
+    prgy = ((parlist + tuple(items[4:])) if Mbmap
+            else (parlist + tuple(items[3:])))
+    return (
+        ("Mbmap", Mbmap),
+        ("cm_column", items.index("cM")),
+        ("mb_column", None if not Mbmap else items.index("Mb")),
+        ("prgy", prgy),
+        ("nprgy", len(prgy)))