diff options
author | BonfaceKilz | 2021-03-24 16:18:00 +0300 |
---|---|---|
committer | BonfaceKilz | 2021-05-08 19:19:47 +0300 |
commit | 7b94f989bcfbf6543bfa628422331adfa3d5daac (patch) | |
tree | 3ab2e7eb86ceb8f47284b10c7ec92de4c3a10eb1 /gn3/computations/parsers.py | |
parent | c516eb05db17d75db9e202750989085cfdd1bd02 (diff) | |
download | genenetwork3-7b94f989bcfbf6543bfa628422331adfa3d5daac.tar.gz |
Add extra procedure for parsing a genotype file
* gn3/computations/parsers.py (parse_genofile): New procedure.
* tests/unit/computations/test_parsers.py: New test files for above.
Diffstat (limited to 'gn3/computations/parsers.py')
-rw-r--r-- | gn3/computations/parsers.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py new file mode 100644 index 0000000..94387ff --- /dev/null +++ b/gn3/computations/parsers.py @@ -0,0 +1,38 @@ +"""Parsers for generating some files in genenetwork""" +import os +from typing import Any, Dict, List, Tuple + + +def parse_genofile(file_path: str) -> Tuple[List[str], + List[Dict[str, Any]]]: + """Parse a genotype file with a given format""" + if not os.path.exists(file_path): + raise FileNotFoundError + __map = { + 'b': -1, + 'd': 1, + 'h': 0, + 'u': None, + } + genotypes, strains = [], [] + with open(file_path, "r") as _genofile: + for line in _genofile: + line = line.strip() + if line.startswith(("#", "@")): + continue + cells = line.split() + if line.startswith("Chr"): + strains = cells[4:] + strains = [strain.lower() for strain in strains] + continue + values = [__map.get(value.lower(), None) for value in cells[4:]] + genotype = { + "chr": cells[0], + "locus": cells[1], + "cm": cells[2], + "mb": cells[3], + "values": values, + "dicvalues": dict(zip(strains, values)), + } + genotypes.append(genotype) + return strains, genotypes |