about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gn3/computations/parsers.py38
-rw-r--r--tests/unit/computations/test_parsers.py54
2 files changed, 92 insertions, 0 deletions
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
new file mode 100644
index 0000000..94387ff
--- /dev/null
+++ b/gn3/computations/parsers.py
@@ -0,0 +1,38 @@
+"""Parsers for generating some files in genenetwork"""
+import os
+from typing import Any, Dict, List, Tuple
+
+
+def parse_genofile(file_path: str) -> Tuple[List[str],
+                                            List[Dict[str, Any]]]:
+    """Parse a genotype file with a given format"""
+    if not os.path.exists(file_path):
+        raise FileNotFoundError
+    __map = {
+        'b': -1,
+        'd': 1,
+        'h': 0,
+        'u': None,
+    }
+    genotypes, strains = [], []
+    with open(file_path, "r") as _genofile:
+        for line in _genofile:
+            line = line.strip()
+            if line.startswith(("#", "@")):
+                continue
+            cells = line.split()
+            if line.startswith("Chr"):
+                strains = cells[4:]
+                strains = [strain.lower() for strain in strains]
+                continue
+            values = [__map.get(value.lower(), None) for value in cells[4:]]
+            genotype = {
+                "chr": cells[0],
+                "locus": cells[1],
+                "cm": cells[2],
+                "mb": cells[3],
+                "values":  values,
+                "dicvalues": dict(zip(strains, values)),
+            }
+            genotypes.append(genotype)
+        return strains, genotypes
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
new file mode 100644
index 0000000..19c3067
--- /dev/null
+++ b/tests/unit/computations/test_parsers.py
@@ -0,0 +1,54 @@
+"""Test cases for procedures defined in computations.parsers"""
+import unittest
+import os
+
+from gn3.computations.parsers import parse_genofile
+
+
+class TestParsers(unittest.TestCase):
+    """Test cases for some various parsers"""
+
+    def test_parse_genofile_without_existing_file(self):
+        """Assert that an error is raised if the genotype file is absent"""
+        self.assertRaises(FileNotFoundError, parse_genofile,
+                          "/non-existent-file")
+
+    def test_parse_genofile_with_existing_file(self):
+        """Test that a genotype file is parsed correctly"""
+        strains = ["bxd1", "bxd2"]
+        genotypes = [
+            {"chr": "1", "locus": "rs31443144",
+             "cm": "1.50", "mb": "3.010274",
+             "values": [-1, -1],
+             "dicvalues": {'bxd1': -1, 'bxd2': -1}},
+            {"chr": "2", "locus": "rs27644551",
+             "cm": "93.26", "mb": "173.542999",
+             "values": [1, 1],
+             "dicvalues": {'bxd1': 1, 'bxd2': 1}},
+            {"chr": "3", "locus": "rs31187985",
+             "cm": "17.12", "mb": "41.921845",
+             "values": [1, 1],
+             "dicvalues": {'bxd1': 1, 'bxd2': 1}},
+            {"chr": "4", "locus": "rs30254612",
+             "cm": "2.15", "mb": "3.718812",
+             "values": [-1, 1],
+             "dicvalues": {'bxd1': -1, 'bxd2': 1}},
+            {"chr": "5", "locus": "UNCHS047057",
+             "cm": "3.10", "mb": "4.199559",
+             "values": [-1, -1],
+             "dicvalues": {'bxd1': -1, 'bxd2': -1}},
+            {"chr": "X", "locus": "ChrXp_no_data",
+             "cm": "1.40", "mb": "3.231738",
+             "values": [1, -1],
+             "dicvalues": {'bxd1': 1, 'bxd2': -1}},
+            {"chr": "X", "locus": "Affy_17539964",
+             "cm": "1.40", "mb": "7.947581",
+             "values": [1, -1],
+             "dicvalues": {'bxd1': 1, 'bxd2': -1}},
+        ]
+        test_genotype_file = os.path.abspath(os.path.join(
+            os.path.dirname(__file__),
+            "../test_data/genotype.txt"
+        ))
+        self.assertEqual(parse_genofile(
+            test_genotype_file), (strains, genotypes))