aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBonfaceKilz2021-03-24 16:18:00 +0300
committerBonfaceKilz2021-05-08 19:19:47 +0300
commit7b94f989bcfbf6543bfa628422331adfa3d5daac (patch)
tree3ab2e7eb86ceb8f47284b10c7ec92de4c3a10eb1
parentc516eb05db17d75db9e202750989085cfdd1bd02 (diff)
downloadgenenetwork3-7b94f989bcfbf6543bfa628422331adfa3d5daac.tar.gz
Add extra procedure for parsing a genotype file
* gn3/computations/parsers.py (parse_genofile): New procedure. * tests/unit/computations/test_parsers.py: New test files for above.
-rw-r--r--gn3/computations/parsers.py38
-rw-r--r--tests/unit/computations/test_parsers.py54
2 files changed, 92 insertions, 0 deletions
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
new file mode 100644
index 0000000..94387ff
--- /dev/null
+++ b/gn3/computations/parsers.py
@@ -0,0 +1,38 @@
+"""Parsers for generating some files in genenetwork"""
+import os
+from typing import Any, Dict, List, Tuple
+
+
+def parse_genofile(file_path: str) -> Tuple[List[str],
+ List[Dict[str, Any]]]:
+ """Parse a genotype file with a given format"""
+ if not os.path.exists(file_path):
+ raise FileNotFoundError
+ __map = {
+ 'b': -1,
+ 'd': 1,
+ 'h': 0,
+ 'u': None,
+ }
+ genotypes, strains = [], []
+ with open(file_path, "r") as _genofile:
+ for line in _genofile:
+ line = line.strip()
+ if line.startswith(("#", "@")):
+ continue
+ cells = line.split()
+ if line.startswith("Chr"):
+ strains = cells[4:]
+ strains = [strain.lower() for strain in strains]
+ continue
+ values = [__map.get(value.lower(), None) for value in cells[4:]]
+ genotype = {
+ "chr": cells[0],
+ "locus": cells[1],
+ "cm": cells[2],
+ "mb": cells[3],
+ "values": values,
+ "dicvalues": dict(zip(strains, values)),
+ }
+ genotypes.append(genotype)
+ return strains, genotypes
diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py
new file mode 100644
index 0000000..19c3067
--- /dev/null
+++ b/tests/unit/computations/test_parsers.py
@@ -0,0 +1,54 @@
+"""Test cases for procedures defined in computations.parsers"""
+import unittest
+import os
+
+from gn3.computations.parsers import parse_genofile
+
+
+class TestParsers(unittest.TestCase):
+ """Test cases for some various parsers"""
+
+ def test_parse_genofile_without_existing_file(self):
+ """Assert that an error is raised if the genotype file is absent"""
+ self.assertRaises(FileNotFoundError, parse_genofile,
+ "/non-existent-file")
+
+ def test_parse_genofile_with_existing_file(self):
+ """Test that a genotype file is parsed correctly"""
+ strains = ["bxd1", "bxd2"]
+ genotypes = [
+ {"chr": "1", "locus": "rs31443144",
+ "cm": "1.50", "mb": "3.010274",
+ "values": [-1, -1],
+ "dicvalues": {'bxd1': -1, 'bxd2': -1}},
+ {"chr": "2", "locus": "rs27644551",
+ "cm": "93.26", "mb": "173.542999",
+ "values": [1, 1],
+ "dicvalues": {'bxd1': 1, 'bxd2': 1}},
+ {"chr": "3", "locus": "rs31187985",
+ "cm": "17.12", "mb": "41.921845",
+ "values": [1, 1],
+ "dicvalues": {'bxd1': 1, 'bxd2': 1}},
+ {"chr": "4", "locus": "rs30254612",
+ "cm": "2.15", "mb": "3.718812",
+ "values": [-1, 1],
+ "dicvalues": {'bxd1': -1, 'bxd2': 1}},
+ {"chr": "5", "locus": "UNCHS047057",
+ "cm": "3.10", "mb": "4.199559",
+ "values": [-1, -1],
+ "dicvalues": {'bxd1': -1, 'bxd2': -1}},
+ {"chr": "X", "locus": "ChrXp_no_data",
+ "cm": "1.40", "mb": "3.231738",
+ "values": [1, -1],
+ "dicvalues": {'bxd1': 1, 'bxd2': -1}},
+ {"chr": "X", "locus": "Affy_17539964",
+ "cm": "1.40", "mb": "7.947581",
+ "values": [1, -1],
+ "dicvalues": {'bxd1': 1, 'bxd2': -1}},
+ ]
+ test_genotype_file = os.path.abspath(os.path.join(
+ os.path.dirname(__file__),
+ "../test_data/genotype.txt"
+ ))
+ self.assertEqual(parse_genofile(
+ test_genotype_file), (strains, genotypes))