diff options
| -rw-r--r-- | gn3/computations/parsers.py | 38 | ||||
| -rw-r--r-- | tests/unit/computations/test_parsers.py | 54 | 
2 files changed, 92 insertions, 0 deletions
| diff --git a/gn3/computations/parsers.py b/gn3/computations/parsers.py new file mode 100644 index 0000000..94387ff --- /dev/null +++ b/gn3/computations/parsers.py @@ -0,0 +1,38 @@ +"""Parsers for generating some files in genenetwork""" +import os +from typing import Any, Dict, List, Tuple + + +def parse_genofile(file_path: str) -> Tuple[List[str], + List[Dict[str, Any]]]: + """Parse a genotype file with a given format""" + if not os.path.exists(file_path): + raise FileNotFoundError + __map = { + 'b': -1, + 'd': 1, + 'h': 0, + 'u': None, + } + genotypes, strains = [], [] + with open(file_path, "r") as _genofile: + for line in _genofile: + line = line.strip() + if line.startswith(("#", "@")): + continue + cells = line.split() + if line.startswith("Chr"): + strains = cells[4:] + strains = [strain.lower() for strain in strains] + continue + values = [__map.get(value.lower(), None) for value in cells[4:]] + genotype = { + "chr": cells[0], + "locus": cells[1], + "cm": cells[2], + "mb": cells[3], + "values": values, + "dicvalues": dict(zip(strains, values)), + } + genotypes.append(genotype) + return strains, genotypes diff --git a/tests/unit/computations/test_parsers.py b/tests/unit/computations/test_parsers.py new file mode 100644 index 0000000..19c3067 --- /dev/null +++ b/tests/unit/computations/test_parsers.py @@ -0,0 +1,54 @@ +"""Test cases for procedures defined in computations.parsers""" +import unittest +import os + +from gn3.computations.parsers import parse_genofile + + +class TestParsers(unittest.TestCase): + """Test cases for some various parsers""" + + def test_parse_genofile_without_existing_file(self): + """Assert that an error is raised if the genotype file is absent""" + self.assertRaises(FileNotFoundError, parse_genofile, + "/non-existent-file") + + def test_parse_genofile_with_existing_file(self): + """Test that a genotype file is parsed correctly""" + strains = ["bxd1", "bxd2"] + genotypes = [ + {"chr": "1", "locus": "rs31443144", + "cm": "1.50", "mb": "3.010274", + "values": [-1, -1], + "dicvalues": {'bxd1': -1, 'bxd2': -1}}, + {"chr": "2", "locus": "rs27644551", + "cm": "93.26", "mb": "173.542999", + "values": [1, 1], + "dicvalues": {'bxd1': 1, 'bxd2': 1}}, + {"chr": "3", "locus": "rs31187985", + "cm": "17.12", "mb": "41.921845", + "values": [1, 1], + "dicvalues": {'bxd1': 1, 'bxd2': 1}}, + {"chr": "4", "locus": "rs30254612", + "cm": "2.15", "mb": "3.718812", + "values": [-1, 1], + "dicvalues": {'bxd1': -1, 'bxd2': 1}}, + {"chr": "5", "locus": "UNCHS047057", + "cm": "3.10", "mb": "4.199559", + "values": [-1, -1], + "dicvalues": {'bxd1': -1, 'bxd2': -1}}, + {"chr": "X", "locus": "ChrXp_no_data", + "cm": "1.40", "mb": "3.231738", + "values": [1, -1], + "dicvalues": {'bxd1': 1, 'bxd2': -1}}, + {"chr": "X", "locus": "Affy_17539964", + "cm": "1.40", "mb": "7.947581", + "values": [1, -1], + "dicvalues": {'bxd1': 1, 'bxd2': -1}}, + ] + test_genotype_file = os.path.abspath(os.path.join( + os.path.dirname(__file__), + "../test_data/genotype.txt" + )) + self.assertEqual(parse_genofile( + test_genotype_file), (strains, genotypes)) | 
