From b975e0cfd1d0adc5f51e66292d29d4651d3f053f Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 07:35:40 +0300 Subject: Parse the genotype file's data header * gn3/db/genotypes.py: parse data header * tests/unit/db/test_genotypes.py: check that header's parse works correctly. Add tests to check that the parser works as expected. Add code to implement the parsing and pass the tests. --- gn3/db/genotypes.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'gn3/db') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index 2be3e1a..be0dfc2 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -87,3 +87,22 @@ def parse_genotype_labels(lines: list): return tuple( item for item in (__parse_label(line) for line in lines) if item is not None) + +def parse_genotype_header(line: str, parlist = tuple()): + """ + Parse the genotype file header line + + DESCRIPTION: + Reworks + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114 + """ + items = [item.strip() for item in line.split("\t")] + Mbmap = "Mb" in items + prgy = ((parlist + tuple(items[4:])) if Mbmap + else (parlist + tuple(items[3:]))) + return ( + ("Mbmap", Mbmap), + ("cm_column", items.index("cM")), + ("mb_column", None if not Mbmap else items.index("Mb")), + ("prgy", prgy), + ("nprgy", len(prgy))) -- cgit v1.2.3