aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
authorBonfaceKilz2021-03-24 16:18:00 +0300
committerBonfaceKilz2021-05-08 19:19:47 +0300
commit7b94f989bcfbf6543bfa628422331adfa3d5daac (patch)
tree3ab2e7eb86ceb8f47284b10c7ec92de4c3a10eb1 /gn3/computations
parentc516eb05db17d75db9e202750989085cfdd1bd02 (diff)
downloadgenenetwork3-7b94f989bcfbf6543bfa628422331adfa3d5daac.tar.gz
Add extra procedure for parsing a genotype file
* gn3/computations/parsers.py (parse_genofile): New procedure. * tests/unit/computations/test_parsers.py: New test files for above.
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/parsers.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py
new file mode 100644
index 0000000..94387ff
--- /dev/null
+++ b/gn3/computations/parsers.py
@@ -0,0 +1,38 @@
+"""Parsers for generating some files in genenetwork"""
+import os
+from typing import Any, Dict, List, Tuple
+
+
+def parse_genofile(file_path: str) -> Tuple[List[str],
+ List[Dict[str, Any]]]:
+ """Parse a genotype file with a given format"""
+ if not os.path.exists(file_path):
+ raise FileNotFoundError
+ __map = {
+ 'b': -1,
+ 'd': 1,
+ 'h': 0,
+ 'u': None,
+ }
+ genotypes, strains = [], []
+ with open(file_path, "r") as _genofile:
+ for line in _genofile:
+ line = line.strip()
+ if line.startswith(("#", "@")):
+ continue
+ cells = line.split()
+ if line.startswith("Chr"):
+ strains = cells[4:]
+ strains = [strain.lower() for strain in strains]
+ continue
+ values = [__map.get(value.lower(), None) for value in cells[4:]]
+ genotype = {
+ "chr": cells[0],
+ "locus": cells[1],
+ "cm": cells[2],
+ "mb": cells[3],
+ "values": values,
+ "dicvalues": dict(zip(strains, values)),
+ }
+ genotypes.append(genotype)
+ return strains, genotypes