From 221c773daea839ecf0e50c196484bb91e3a6db33 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 06:18:20 +0300 Subject: Implement parsing of genotype labels Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/genotypes.py: parse genotype labels * tests/unit/db/test_genotypes.py: test that genotype labels are parsed correctly As part of parsing the genotype files into usable python data structures, this commit adds a function to parse the label lines (beginning with "@") into the appropriate values. --- tests/unit/db/test_genotypes.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/unit/db/test_genotypes.py (limited to 'tests/unit/db') diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py new file mode 100644 index 0000000..0264764 --- /dev/null +++ b/tests/unit/db/test_genotypes.py @@ -0,0 +1,17 @@ +"""Tests gn3.db.genotypes""" +from unittest import TestCase +from gn3.db.genotypes import parse_genotype_labels + +class TestGenotypes(TestCase): + """Tests for functions in `gn3.db.genotypes`.""" + + def test_parse_genotype_labels(self): + self.assertEqual( + parse_genotype_labels([ + "@name: test_group\t", "@filler: test_filler ", + "@type:test_type", "@mat:test_mat \t", "@pat:test_pat ", + "@het: test_het ", "@unk: test_unk", "@other: test_other", + "@brrr: test_brrr "]), + (("group", "test_group"), ("filler", "test_filler"), + ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"), + ("het", "test_het"), ("unk", "test_unk"))) -- cgit v1.2.3 From b975e0cfd1d0adc5f51e66292d29d4651d3f053f Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 07:35:40 +0300 Subject: Parse the genotype file's data header * gn3/db/genotypes.py: parse data header * tests/unit/db/test_genotypes.py: check that header's parse works correctly. Add tests to check that the parser works as expected. Add code to implement the parsing and pass the tests. --- gn3/db/genotypes.py | 19 +++++++++++++++++++ tests/unit/db/test_genotypes.py | 22 +++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'tests/unit/db') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index 2be3e1a..be0dfc2 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -87,3 +87,22 @@ def parse_genotype_labels(lines: list): return tuple( item for item in (__parse_label(line) for line in lines) if item is not None) + +def parse_genotype_header(line: str, parlist = tuple()): + """ + Parse the genotype file header line + + DESCRIPTION: + Reworks + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114 + """ + items = [item.strip() for item in line.split("\t")] + Mbmap = "Mb" in items + prgy = ((parlist + tuple(items[4:])) if Mbmap + else (parlist + tuple(items[3:]))) + return ( + ("Mbmap", Mbmap), + ("cm_column", items.index("cM")), + ("mb_column", None if not Mbmap else items.index("Mb")), + ("prgy", prgy), + ("nprgy", len(prgy))) diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py index 0264764..4fa8a53 100644 --- a/tests/unit/db/test_genotypes.py +++ b/tests/unit/db/test_genotypes.py @@ -1,6 +1,6 @@ """Tests gn3.db.genotypes""" from unittest import TestCase -from gn3.db.genotypes import parse_genotype_labels +from gn3.db.genotypes import parse_genotype_labels, parse_genotype_header class TestGenotypes(TestCase): """Tests for functions in `gn3.db.genotypes`.""" @@ -15,3 +15,23 @@ class TestGenotypes(TestCase): (("group", "test_group"), ("filler", "test_filler"), ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"), ("het", "test_het"), ("unk", "test_unk"))) + + def test_parse_genotype_header(self): + for header, expected in [ + [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t" + "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"), + (("Mbmap", True), ("cm_column", 2), ("mb_column", 3), + ("prgy", + ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", + "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18", + "BXD19")), + ("nprgy", 14))], + [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11" + "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"), + (("Mbmap", False), ("cm_column", 2), ("mb_column", None), + ("prgy", + ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11", + "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")), + ("nprgy", 13))]]: + with self.subTest(header=header): + self.assertEqual(parse_genotype_header(header), expected) -- cgit v1.2.3 From a1c217cf277feda3815a8435d6c8909f1b5546a1 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 09:11:17 +0300 Subject: Parse data lines into markers Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/genotypes.py: parse data lines in file to genetic markers. * tests/unit/db/test_genotypes.py: test that parsing works. Add some tests to check that the parsing of the markers works as expected, and add the code to actually parse the markers. --- gn3/db/genotypes.py | 37 +++++++++++++++++++++++++++++++++++++ tests/unit/db/test_genotypes.py | 38 +++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'tests/unit/db') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index be0dfc2..8710d2e 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -106,3 +106,40 @@ def parse_genotype_header(line: str, parlist = tuple()): ("mb_column", None if not Mbmap else items.index("Mb")), ("prgy", prgy), ("nprgy", len(prgy))) + +def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list): + """ + Parse a data line in a genotype file + + DESCRIPTION: + Reworks + https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L143-L190 + """ + marker_row = [item.strip() for item in line.split("\t")] + geno_table = { + geno_obj["mat"]: -1, geno_obj["pat"]: 1, geno_obj["het"]: 0, + geno_obj["unk"]: "U" + } + start_pos = 4 if geno_obj["Mbmap"] else 3 + if len(parlist) > 0: + start_pos = start_pos + 2 + + alleles = marker_row[start_pos:] + genotype = tuple( + (geno_table[allele] if allele in geno_table.keys() else "U") + for allele in alleles) + if len(parlist) > 0: + genotype = (-1, 1) + genotype + try: + cM = float(geno_obj["cm_column"]) + except: + if geno_obj["Mbmap"]: + cM = float(geno_obj["mb_column"]) + else: + cM = 0 + return ( + ("chr", marker_row[0]), + ("name", marker_row[1]), + ("cM", cM), + ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None), + ("genotype", genotype)) diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py index 4fa8a53..ba90191 100644 --- a/tests/unit/db/test_genotypes.py +++ b/tests/unit/db/test_genotypes.py @@ -1,11 +1,13 @@ """Tests gn3.db.genotypes""" from unittest import TestCase -from gn3.db.genotypes import parse_genotype_labels, parse_genotype_header +from gn3.db.genotypes import ( + parse_genotype_labels, parse_genotype_header, parse_genotype_data_line) class TestGenotypes(TestCase): """Tests for functions in `gn3.db.genotypes`.""" def test_parse_genotype_labels(self): + """Test that the genotype labels are parsed correctly.""" self.assertEqual( parse_genotype_labels([ "@name: test_group\t", "@filler: test_filler ", @@ -17,6 +19,7 @@ class TestGenotypes(TestCase): ("het", "test_het"), ("unk", "test_unk"))) def test_parse_genotype_header(self): + """Test that the genotype header is parsed correctly.""" for header, expected in [ [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t" "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"), @@ -35,3 +38,36 @@ class TestGenotypes(TestCase): ("nprgy", 13))]]: with self.subTest(header=header): self.assertEqual(parse_genotype_header(header), expected) + + def test_parse_genotype_data_line(self): + """Test parsing of data lines.""" + for line, geno_obj, parlist, expected in [ + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB", + {"mat": "test_mat", "pat": "test_pat", "het": "test_het", + "unk": "test_unk", "cm_column": 2, "Mbmap": True, + "mb_column": 3}, + tuple(), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", + ("U", "U", "U", "U", "U", "U", "U", "U", "U", "U")))], + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB", + {"mat": "test_mat", "pat": "test_pat", "het": "test_het", + "unk": "test_unk", "cm_column": 2, "Mbmap": True, + "mb_column": 3}, + ("some", "parlist", "content"), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", + (-1, 1, "U", "U", "U", "U", "U", "U", "U", "U")))], + ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tH\tD\tB\tU\tD\tB\tB", + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": True, "mb_column": 3}, + tuple(), + (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]: + with self.subTest(line = line): + self.assertEqual( + parse_genotype_data_line(line, geno_obj, parlist), + expected) -- cgit v1.2.3 From abfc0410a2385d8c3d6ee1915fc99b708e1d0dbc Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 10:49:52 +0300 Subject: Built top-level genotype file parsing function Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * gn3/db/genotypes.py: parse genotype files * tests/unit/db/test_genotypes.py: test parsing is correct Add the overall genotype files parsing function and tests to check that the parsing works as expected. --- gn3/db/genotypes.py | 38 ++++++++++++++- tests/unit/db/test_genotypes.py | 101 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 136 insertions(+), 3 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index 8710d2e..b5d14a5 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -107,7 +107,7 @@ def parse_genotype_header(line: str, parlist = tuple()): ("prgy", prgy), ("nprgy", len(prgy))) -def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list): +def parse_genotype_marker(line: str, geno_obj: dict, parlist: list): """ Parse a data line in a genotype file @@ -143,3 +143,39 @@ def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list): ("cM", cM), ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None), ("genotype", genotype)) + +def build_genotype_chromosomes(geno_obj, markers): + """ + Build up the chromosomes from the given markers and partially built geno + object + """ + mrks = [dict(marker) for marker in markers] + chr_names = {marker["chr"] for marker in mrks} + return tuple(( + ("name", chr_name), ("mb_exists", geno_obj["Mbmap"]), ("cm_column", 2), + ("mb_column", geno_obj["mb_column"]), + ("loci", tuple(marker for marker in mrks if marker["chr"] == chr_name))) + for chr_name in sorted(chr_names)) + +def parse_genotype_file(filename: str, parlist = tuple()): + """ + Parse the provided genotype file into a usable pytho3 data structure. + """ + with open(filename, "r") as infile: + contents = infile.readlines() + + lines = tuple(line for line in contents if + ((not line.strip().startswith("#")) and + (not line.strip() == ""))) + labels = parse_genotype_labels( + line for line in lines if line.startswith("@")) + data_lines = tuple(line for line in lines if not line.startswith("@")) + header = parse_genotype_header(data_lines[0], parlist) + geno_obj = dict(labels + header) + markers = tuple( + parse_genotype_marker(line, geno_obj, parlist) + for line in data_lines[1:]) + chromosomes = tuple( + dict(chromosome) for chromosome in + build_genotype_chromosomes(geno_obj, markers)) + return {**geno_obj, "chromosomes": chromosomes} diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py index ba90191..a05ce48 100644 --- a/tests/unit/db/test_genotypes.py +++ b/tests/unit/db/test_genotypes.py @@ -1,7 +1,11 @@ """Tests gn3.db.genotypes""" from unittest import TestCase from gn3.db.genotypes import ( - parse_genotype_labels, parse_genotype_header, parse_genotype_data_line) + parse_genotype_file, + parse_genotype_labels, + parse_genotype_header, + parse_genotype_marker, + build_genotype_chromosomes) class TestGenotypes(TestCase): """Tests for functions in `gn3.db.genotypes`.""" @@ -69,5 +73,98 @@ class TestGenotypes(TestCase): ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]: with self.subTest(line = line): self.assertEqual( - parse_genotype_data_line(line, geno_obj, parlist), + parse_genotype_marker(line, geno_obj, parlist), expected) + + def test_build_genotype_chromosomes(self): + """ + Given `markers` and `geno_obj`, test that `build_genotype_chromosomes` + builds a sequence of chromosomes with the given markers ordered + according to the `chr` value.""" + for markers, geno_obj, expected in [ + [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1))), + (("chr", "2"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", 3.0), + ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))], + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": True, "mb_column": 3}, + ((("name", "1"), ("mb_exists", True), ("cm_column", 2), + ("mb_column", 3), + ("loci", + ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": 3.0, + "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))), + (("name", "2"), ("mb_exists", True), ("cm_column", 2), + ("mb_column", 3), + ("loci", + ({"chr": "2", "name": "rs31443144", "cM": 2.0, "Mb": 3.0, + "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))))], + [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), + ("Mb", None), + ("genotype", (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)))], + {"mat": "B", "pat": "D", "het": "H", "unk": "U", + "cm_column": 2, "Mbmap": False, "mb_column": None}, + ((("name", "1"), ("mb_exists", False), ("cm_column", 2), + ("mb_column", None), + ("loci", + ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None, + "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]: + with self.subTest(markers = markers): + self.assertEqual( + build_genotype_chromosomes(geno_obj, markers), + expected) + + def test_parse_genotype_file(self): + """Test the parsing of genotype files. """ + self.assertEqual( + parse_genotype_file( + "tests/unit/db/data/genotypes/genotype_sample1.geno"), + {"group": "BXD", + "type": "riset", + "mat": "B", + "pat": "D", + "het": "H", + "unk": "U", + "Mbmap": True, + "cm_column": 2, + "mb_column": 3, + "prgy": ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"), + "nprgy": 6, + "chromosomes": ( + {"name": "1", + "mb_exists": True, + "cm_column": 2, + "mb_column": 3, + "loci": ( + {"chr": "1", + "name": "rs31443144", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 1, -1) + }, + {"chr": "1", + "name": "rs6269442", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 0, "U")}, + {"chr": "1", + "name": "rs32285189", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, "U", 1, 1, 1, -1)})}, + {"name": "2", + "mb_exists": True, + "cm_column": 2, + "mb_column": 3, + "loci": ( + {"chr": "2", + "name": "rs31443144", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 1, -1)}, + {"chr": "2", + "name": "rs6269442", + "cM": 2.0, + "Mb": 3.0, + "genotype": (-1, -1, 1, 1, 0, "U")})})}) -- cgit v1.2.3 From 3ded952f40f486d9aa69746eac2afe7f67fef790 Mon Sep 17 00:00:00 2001 From: Muriithi Frederick Muriuki Date: Wed, 1 Sep 2021 11:08:38 +0300 Subject: Fix linting and typing issues Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi --- gn3/db/genotypes.py | 32 ++++++++++++++++---------------- tests/unit/db/test_genotypes.py | 10 +++++----- 2 files changed, 21 insertions(+), 21 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index b5d14a5..b03d55c 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -88,7 +88,7 @@ def parse_genotype_labels(lines: list): item for item in (__parse_label(line) for line in lines) if item is not None) -def parse_genotype_header(line: str, parlist = tuple()): +def parse_genotype_header(line: str, parlist: tuple = tuple()): """ Parse the genotype file header line @@ -97,13 +97,13 @@ def parse_genotype_header(line: str, parlist = tuple()): https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114 """ items = [item.strip() for item in line.split("\t")] - Mbmap = "Mb" in items - prgy = ((parlist + tuple(items[4:])) if Mbmap + mbmap = "Mb" in items + prgy = ((parlist + tuple(items[4:])) if mbmap else (parlist + tuple(items[3:]))) return ( - ("Mbmap", Mbmap), + ("Mbmap", mbmap), ("cm_column", items.index("cM")), - ("mb_column", None if not Mbmap else items.index("Mb")), + ("mb_column", None if not mbmap else items.index("Mb")), ("prgy", prgy), ("nprgy", len(prgy))) @@ -131,16 +131,16 @@ def parse_genotype_marker(line: str, geno_obj: dict, parlist: list): if len(parlist) > 0: genotype = (-1, 1) + genotype try: - cM = float(geno_obj["cm_column"]) + cm_val = float(geno_obj["cm_column"]) except: if geno_obj["Mbmap"]: - cM = float(geno_obj["mb_column"]) + cm_val = float(geno_obj["mb_column"]) else: - cM = 0 + cm_val = 0 return ( ("chr", marker_row[0]), ("name", marker_row[1]), - ("cM", cM), + ("cM", cm_val), ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None), ("genotype", genotype)) @@ -155,9 +155,9 @@ def build_genotype_chromosomes(geno_obj, markers): ("name", chr_name), ("mb_exists", geno_obj["Mbmap"]), ("cm_column", 2), ("mb_column", geno_obj["mb_column"]), ("loci", tuple(marker for marker in mrks if marker["chr"] == chr_name))) - for chr_name in sorted(chr_names)) + for chr_name in sorted(chr_names)) -def parse_genotype_file(filename: str, parlist = tuple()): +def parse_genotype_file(filename: str, parlist: tuple = tuple()): """ Parse the provided genotype file into a usable pytho3 data structure. """ @@ -165,16 +165,16 @@ def parse_genotype_file(filename: str, parlist = tuple()): contents = infile.readlines() lines = tuple(line for line in contents if - ((not line.strip().startswith("#")) and - (not line.strip() == ""))) + ((not line.strip().startswith("#")) and + (not line.strip() == ""))) labels = parse_genotype_labels( - line for line in lines if line.startswith("@")) + [line for line in lines if line.startswith("@")]) data_lines = tuple(line for line in lines if not line.startswith("@")) header = parse_genotype_header(data_lines[0], parlist) geno_obj = dict(labels + header) markers = tuple( - parse_genotype_marker(line, geno_obj, parlist) - for line in data_lines[1:]) + [parse_genotype_marker(line, geno_obj, parlist) + for line in data_lines[1:]]) chromosomes = tuple( dict(chromosome) for chromosome in build_genotype_chromosomes(geno_obj, markers)) diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py index a05ce48..c125224 100644 --- a/tests/unit/db/test_genotypes.py +++ b/tests/unit/db/test_genotypes.py @@ -18,9 +18,9 @@ class TestGenotypes(TestCase): "@type:test_type", "@mat:test_mat \t", "@pat:test_pat ", "@het: test_het ", "@unk: test_unk", "@other: test_other", "@brrr: test_brrr "]), - (("group", "test_group"), ("filler", "test_filler"), - ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"), - ("het", "test_het"), ("unk", "test_unk"))) + (("group", "test_group"), ("filler", "test_filler"), + ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"), + ("het", "test_het"), ("unk", "test_unk"))) def test_parse_genotype_header(self): """Test that the genotype header is parsed correctly.""" @@ -71,7 +71,7 @@ class TestGenotypes(TestCase): (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0), ("Mb", 3.0), ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]: - with self.subTest(line = line): + with self.subTest(line=line): self.assertEqual( parse_genotype_marker(line, geno_obj, parlist), expected) @@ -110,7 +110,7 @@ class TestGenotypes(TestCase): ("loci", ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None, "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]: - with self.subTest(markers = markers): + with self.subTest(markers=markers): self.assertEqual( build_genotype_chromosomes(geno_obj, markers), expected) -- cgit v1.2.3 From ed2e4c0f9d68cfb720da95eba559d69359f7b5fc Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 15 Sep 2021 05:35:34 +0300 Subject: Add missing sample file for tests * tests/unit/db/data/genotypes/genotype_sample1.geno: new file Add a missing sample data file needed for unit tests. --- tests/unit/db/data/genotypes/genotype_sample1.geno | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/unit/db/data/genotypes/genotype_sample1.geno (limited to 'tests/unit/db') diff --git a/tests/unit/db/data/genotypes/genotype_sample1.geno b/tests/unit/db/data/genotypes/genotype_sample1.geno new file mode 100644 index 0000000..2a55964 --- /dev/null +++ b/tests/unit/db/data/genotypes/genotype_sample1.geno @@ -0,0 +1,23 @@ +# File name: genotype_sample for testing + +# Metadata: Please retain this header information with file. + + +@name: BXD +@type: riset +@mat: B +@pat: D +@het:H +@unk: U + + + + + + +Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 BXD9 +1 rs31443144 1.50 3.010274 B B D D D B +1 rs6269442 1.50 3.492195 B B D D H Y +1 rs32285189 1.63 3.511204 B U D D D B +2 rs31443144 1.50 3.010274 B B D D D B +2 rs6269442 1.50 3.492195 B B D D H Y \ No newline at end of file -- cgit v1.2.3 From 56c73324c285d896567268370f3955bbd15754b0 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Wed, 22 Sep 2021 09:02:46 +0300 Subject: Fix more pylint errors --- gn3/computations/qtlreaper.py | 3 ++- gn3/db/genotypes.py | 2 +- tests/unit/db/test_traits.py | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py index 5ddea76..8b2893e 100644 --- a/gn3/computations/qtlreaper.py +++ b/gn3/computations/qtlreaper.py @@ -74,7 +74,8 @@ def run_reaper( if separate_nperm_output: permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format( output_dir, random_string(10)) - output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item] + output_list = output_list + [ + "--permu_output", permu_output_filename] # type: ignore[list-item] else: permu_output_filename = None diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py index 9ea9f20..9987320 100644 --- a/gn3/db/genotypes.py +++ b/gn3/db/genotypes.py @@ -56,7 +56,7 @@ def __load_genotype_samples_from_geno(genotype_filename: str): continue break - headers = line.split("\t" ) # type: ignore[arg-type] + headers = line.split("\t") # type: ignore[arg-type] if headers[3] == "Mb": return headers[4:] return headers[3:] diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index ee98893..baa2af3 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -166,6 +166,7 @@ class TestTraitsDBFunctions(TestCase): the right calls. """ + # pylint: disable=C0103 db_mock = mock.MagicMock() STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" -- cgit v1.2.3 From 95c5c0e73bffbf0287a17309e703063ee54d25ba Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 23 Sep 2021 03:45:19 +0300 Subject: Refactor: Move common sample data to separate file Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Move common sample test data into a separate file where it can be imported from, to prevent pylint error R0801 which proved tricky to silence in any other way. --- tests/unit/computations/test_qtlreaper.py | 68 ++++-------------- tests/unit/db/test_traits.py | 15 ++-- tests/unit/sample_test_data.py | 111 ++++++++++++++++++++++++++++++ tests/unit/test_heatmaps.py | 96 +------------------------- 4 files changed, 134 insertions(+), 156 deletions(-) create mode 100644 tests/unit/sample_test_data.py (limited to 'tests/unit/db') diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py index d420470..742d106 100644 --- a/tests/unit/computations/test_qtlreaper.py +++ b/tests/unit/computations/test_qtlreaper.py @@ -4,6 +4,7 @@ from gn3.computations.qtlreaper import ( parse_reaper_main_results, organise_reaper_main_results, parse_reaper_permutation_results) +from tests.unit.sample_test_data import organised_trait_1 class TestQTLReaper(TestCase): """Class for testing qtlreaper interface functions.""" @@ -81,99 +82,54 @@ class TestQTLReaper(TestCase): self.assertEqual( organise_reaper_main_results([ { - "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500, + "ID": "1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500, "Mb": 3.010, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500, + "ID": "1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500, "Mb": 3.492, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630, + "ID": "1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630, "Mb": 3.511, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630, + "ID": "1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630, "Mb": 3.660, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750, + "ID": "1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750, "Mb": 3.777, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880, + "ID": "1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880, "Mb": 3.812, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010, + "ID": "1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010, "Mb": 4.431, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010, + "ID": "1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010, "Mb": 4.447, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140, + "ID": "1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140, "Mb": 4.519, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 }, { - "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140, + "ID": "1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140, "Mb": 4.776, "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 } ]), - {"T1": {"ID": "T1", - "chromosomes": { - 1: {"Chr": 1, - "loci": [ - { - "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }]}, - 2: {"Chr": 2, - "loci": [ - { - "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }]}}}}) + organised_trait_1) diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py index baa2af3..8af8e82 100644 --- a/tests/unit/db/test_traits.py +++ b/tests/unit/db/test_traits.py @@ -170,12 +170,15 @@ class TestTraitsDBFunctions(TestCase): db_mock = mock.MagicMock() STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s" - PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s " - "WHERE StrainId = %s AND Id = %s") - PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s " - "WHERE StrainId = %s AND DataId = %s") - N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s " - "WHERE StrainId = %s AND DataId = %s") + PUBLISH_DATA_SQL: str = ( + "UPDATE PublishData SET value = %s " + "WHERE StrainId = %s AND Id = %s") + PUBLISH_SE_SQL: str = ( + "UPDATE PublishSE SET error = %s " + "WHERE StrainId = %s AND DataId = %s") + N_STRAIN_SQL: str = ( + "UPDATE NStrain SET count = %s " + "WHERE StrainId = %s AND DataId = %s") with db_mock.cursor() as cursor: type(cursor).rowcount = 1 diff --git a/tests/unit/sample_test_data.py b/tests/unit/sample_test_data.py new file mode 100644 index 0000000..407d074 --- /dev/null +++ b/tests/unit/sample_test_data.py @@ -0,0 +1,111 @@ +""" +This module holds a collection of sample data variables, used in more than one + test. + +This is mostly to avoid the `duplicate-code` pylint error that gets raised if +the same data is defined in more than one file. It has been found that adding +the `# pylint: disable=R0801` or `# pylint: disable=duplicate-code` to the top +of the file seems to not work as expected. + +Adding these same declarations to .pylintrc is not an option, since that, +seemingly, would deactivate the warnings for all code in the project: We do not +want that. +""" + +organised_trait_1 = { + "1": { + "ID": "1", + "chromosomes": { + 1: {"Chr": 1, + "loci": [ + { + "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}, + 2: {"Chr": 2, + "loci": [ + { + "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}}}} + +organised_trait_2 = { + "2": { + "ID": "2", + "chromosomes": { + 1: {"Chr": 1, + "loci": [ + { + "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }]}, + 2: {"Chr": 2, + "loci": [ + { + "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, + "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 + }, + { + "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, + "LRS": 0.579, "Additive": -0.074, "pValue": 1.000 + }]}}}} diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py index c0a496b..fd91cf9 100644 --- a/tests/unit/test_heatmaps.py +++ b/tests/unit/test_heatmaps.py @@ -7,6 +7,7 @@ from gn3.heatmaps import ( compute_traits_order, retrieve_strains_and_values, process_traits_data_for_heatmap) +from tests.unit.sample_test_data import organised_trait_1, organised_trait_2 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"] trait_data = { @@ -206,100 +207,7 @@ class TestHeatmap(TestCase): """Check for correct processing of data for heatmap generation.""" self.assertEqual( process_traits_data_for_heatmap( - {"1": { - "ID": "T1", - "chromosomes": { - 1: {"Chr": 1, - "loci": [ - { - "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }]}, - 2: {"Chr": 2, - "loci": [ - { - "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }]}}}, - "2": { - "ID": "T1", - "chromosomes": { - 1: {"Chr": 1, - "loci": [ - { - "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }]}, - 2: {"Chr": 2, - "loci": [ - { - "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519, - "LRS": 0.500, "Additive": -0.074, "pValue": 1.000 - }, - { - "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776, - "LRS": 0.579, "Additive": -0.074, "pValue": 1.000 - }]}}}}, + {**organised_trait_1, **organised_trait_2}, ["2", "1"], [1, 2]), [[[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5], -- cgit v1.2.3 From 1d09a9222f8c661da3abd6d61c09ae19eeb5d793 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 27 Sep 2021 05:02:09 +0300 Subject: Update terminology: `riset` to `group` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi * Update terminology to use the appropriate domain terminology according to Zachary's direction at https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926041744 --- gn3/db/datasets.py | 52 +++++++++++++++++++++--------------------- gn3/db/traits.py | 16 ++++++------- gn3/heatmaps.py | 2 +- tests/unit/db/test_datasets.py | 42 +++++++++++++++++----------------- 4 files changed, 56 insertions(+), 56 deletions(-) (limited to 'tests/unit/db') diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py index 4a05499..6c328f5 100644 --- a/gn3/db/datasets.py +++ b/gn3/db/datasets.py @@ -119,9 +119,9 @@ def retrieve_dataset_name( return fn_map[trait_type](threshold, dataset_name, conn) -def retrieve_geno_riset_fields(name, conn): +def retrieve_geno_group_fields(name, conn): """ - Retrieve the RISet, and RISetID values for various Geno trait types. + Retrieve the Group, and GroupID values for various Geno trait types. """ query = ( "SELECT InbredSet.Name, InbredSet.Id " @@ -130,12 +130,12 @@ def retrieve_geno_riset_fields(name, conn): "AND GenoFreeze.Name = %(name)s") with conn.cursor() as cursor: cursor.execute(query, {"name": name}) - return dict(zip(["riset", "risetid"], cursor.fetchone())) + return dict(zip(["group", "groupid"], cursor.fetchone())) return {} -def retrieve_publish_riset_fields(name, conn): +def retrieve_publish_group_fields(name, conn): """ - Retrieve the RISet, and RISetID values for various Publish trait types. + Retrieve the Group, and GroupID values for various Publish trait types. """ query = ( "SELECT InbredSet.Name, InbredSet.Id " @@ -144,12 +144,12 @@ def retrieve_publish_riset_fields(name, conn): "AND PublishFreeze.Name = %(name)s") with conn.cursor() as cursor: cursor.execute(query, {"name": name}) - return dict(zip(["riset", "risetid"], cursor.fetchone())) + return dict(zip(["group", "groupid"], cursor.fetchone())) return {} -def retrieve_probeset_riset_fields(name, conn): +def retrieve_probeset_group_fields(name, conn): """ - Retrieve the RISet, and RISetID values for various ProbeSet trait types. + Retrieve the Group, and GroupID values for various ProbeSet trait types. """ query = ( "SELECT InbredSet.Name, InbredSet.Id " @@ -159,12 +159,12 @@ def retrieve_probeset_riset_fields(name, conn): "AND ProbeSetFreeze.Name = %(name)s") with conn.cursor() as cursor: cursor.execute(query, {"name": name}) - return dict(zip(["riset", "risetid"], cursor.fetchone())) + return dict(zip(["group", "groupid"], cursor.fetchone())) return {} -def retrieve_temp_riset_fields(name, conn): +def retrieve_temp_group_fields(name, conn): """ - Retrieve the RISet, and RISetID values for `Temp` trait types. + Retrieve the Group, and GroupID values for `Temp` trait types. """ query = ( "SELECT InbredSet.Name, InbredSet.Id " @@ -173,30 +173,30 @@ def retrieve_temp_riset_fields(name, conn): "AND Temp.Name = %(name)s") with conn.cursor() as cursor: cursor.execute(query, {"name": name}) - return dict(zip(["riset", "risetid"], cursor.fetchone())) + return dict(zip(["group", "groupid"], cursor.fetchone())) return {} -def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn): +def retrieve_group_fields(trait_type, trait_name, dataset_info, conn): """ - Retrieve the RISet, and RISetID values for various trait types. + Retrieve the Group, and GroupID values for various trait types. """ - riset_fns_map = { - "Geno": retrieve_geno_riset_fields, - "Publish": retrieve_publish_riset_fields, - "ProbeSet": retrieve_probeset_riset_fields + group_fns_map = { + "Geno": retrieve_geno_group_fields, + "Publish": retrieve_publish_group_fields, + "ProbeSet": retrieve_probeset_group_fields } if trait_type == "Temp": - riset_info = retrieve_temp_riset_fields(trait_name, conn) + group_info = retrieve_temp_group_fields(trait_name, conn) else: - riset_info = riset_fns_map[trait_type](dataset_info["dataset_name"], conn) + group_info = group_fns_map[trait_type](dataset_info["dataset_name"], conn) return { **dataset_info, - **riset_info, - "riset": ( - "BXD" if riset_info.get("riset") == "BXD300" - else riset_info.get("riset", "")) + **group_info, + "group": ( + "BXD" if group_info.get("group") == "BXD300" + else group_info.get("group", "")) } def retrieve_temp_trait_dataset(): @@ -281,11 +281,11 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn): trait_type, threshold, trait["trait_name"], trait["db"]["dataset_name"], conn) } - riset = retrieve_riset_fields( + group = retrieve_group_fields( trait_type, trait["trait_name"], dataset_name_info, conn) return { "display_name": dataset_name_info["dataset_name"], **dataset_name_info, **dataset_fns[trait_type](), - **riset + **group } diff --git a/gn3/db/traits.py b/gn3/db/traits.py index c9d05d7..f2673c8 100644 --- a/gn3/db/traits.py +++ b/gn3/db/traits.py @@ -226,7 +226,7 @@ def set_homologene_id_field_probeset(trait_info, conn): """ query = ( "SELECT HomologeneId FROM Homologene, Species, InbredSet" - " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(riset)s" + " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(group)s" " AND InbredSet.SpeciesId = Species.Id AND" " Species.TaxonomyId = Homologene.TaxonomyId") with conn.cursor() as cursor: @@ -234,7 +234,7 @@ def set_homologene_id_field_probeset(trait_info, conn): query, { k:v for k, v in trait_info.items() - if k in ["geneid", "riset"] + if k in ["geneid", "group"] }) res = cursor.fetchone() if res: @@ -422,7 +422,7 @@ def retrieve_trait_info( if trait_info["haveinfo"]: return { **trait_post_processing_functions_table[trait_dataset_type]( - {**trait_info, "riset": trait_dataset["riset"]}), + {**trait_info, "group": trait_dataset["group"]}), "db": {**trait["db"], **trait_dataset} } return trait_info @@ -449,14 +449,14 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any): for row in cursor.fetchall()] return [] -def retrieve_species_id(riset, conn: Any): +def retrieve_species_id(group, conn: Any): """ - Retrieve a species id given the RISet value + Retrieve a species id given the Group value """ with conn.cursor as cursor: cursor.execute( - "SELECT SpeciesId from InbredSet WHERE Name = %(riset)s", - {"riset": riset}) + "SELECT SpeciesId from InbredSet WHERE Name = %(group)s", + {"group": group}) return cursor.fetchone()[0] return None @@ -482,7 +482,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any): {"trait_name": trait_info["trait_name"], "dataset_name": trait_info["db"]["dataset_name"], "species_id": retrieve_species_id( - trait_info["db"]["riset"], conn)}) + trait_info["db"]["group"], conn)}) return [dict(zip( ["sample_name", "value", "se_error", "id"], row)) for row in cursor.fetchall()] diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py index b6fc6d3..a36940d 100644 --- a/gn3/heatmaps.py +++ b/gn3/heatmaps.py @@ -164,7 +164,7 @@ def build_heatmap(traits_names, conn: Any): retrieve_trait_info(threshold, fullname, conn) for fullname in traits_names] traits_data_list = [retrieve_trait_data(t, conn) for t in traits] - genotype_filename = build_genotype_file(traits[0]["riset"]) + genotype_filename = build_genotype_file(traits[0]["group"]) samples = load_genotype_samples(genotype_filename) exported_traits_data_list = [ export_trait_data(td, samples) for td in traits_data_list] diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py index 38de0e2..39f4af9 100644 --- a/tests/unit/db/test_datasets.py +++ b/tests/unit/db/test_datasets.py @@ -3,10 +3,10 @@ from unittest import mock, TestCase from gn3.db.datasets import ( retrieve_dataset_name, - retrieve_riset_fields, - retrieve_geno_riset_fields, - retrieve_publish_riset_fields, - retrieve_probeset_riset_fields) + retrieve_group_fields, + retrieve_geno_group_fields, + retrieve_publish_group_fields, + retrieve_probeset_group_fields) class TestDatasetsDBFunctions(TestCase): """Test cases for datasets functions.""" @@ -40,9 +40,9 @@ class TestDatasetsDBFunctions(TestCase): table=table, cols=columns), {"threshold": thresh, "name": dataset_name}) - def test_retrieve_probeset_riset_fields(self): + def test_retrieve_probeset_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'ProbeSet' trait type. """ for trait_name, expected in [ @@ -52,7 +52,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_probeset_riset_fields(trait_name, db_mock), + retrieve_probeset_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( @@ -63,34 +63,34 @@ class TestDatasetsDBFunctions(TestCase): " AND ProbeSetFreeze.Name = %(name)s"), {"name": trait_name}) - def test_retrieve_riset_fields(self): + def test_retrieve_group_fields(self): """ - Test that the riset fields are set up correctly for the different trait + Test that the group fields are set up correctly for the different trait types. """ for trait_type, trait_name, dataset_info, expected in [ ["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"}, - {"dataset_name": "pubDBName01", "riset": ""}], + {"dataset_name": "pubDBName01", "group": ""}], ["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"}, - {"dataset_name": "prbDBName01", "riset": ""}], + {"dataset_name": "prbDBName01", "group": ""}], ["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"}, - {"dataset_name": "genoDBName01", "riset": ""}], - ["Temp", "tempTraitName01", {}, {"riset": ""}], + {"dataset_name": "genoDBName01", "group": ""}], + ["Temp", "tempTraitName01", {}, {"group": ""}], ]: db_mock = mock.MagicMock() with self.subTest( trait_type=trait_type, trait_name=trait_name, dataset_info=dataset_info): with db_mock.cursor() as cursor: - cursor.execute.return_value = ("riset_name", 0) + cursor.execute.return_value = ("group_name", 0) self.assertEqual( - retrieve_riset_fields( + retrieve_group_fields( trait_type, trait_name, dataset_info, db_mock), expected) - def test_retrieve_publish_riset_fields(self): + def test_retrieve_publish_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'Publish' trait type. """ for trait_name, expected in [ @@ -100,7 +100,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_publish_riset_fields(trait_name, db_mock), + retrieve_publish_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( @@ -110,9 +110,9 @@ class TestDatasetsDBFunctions(TestCase): " AND PublishFreeze.Name = %(name)s"), {"name": trait_name}) - def test_retrieve_geno_riset_fields(self): + def test_retrieve_geno_group_fields(self): """ - Test that the `riset` and `riset_id` fields are retrieved appropriately + Test that the `group` and `group_id` fields are retrieved appropriately for the 'Geno' trait type. """ for trait_name, expected in [ @@ -122,7 +122,7 @@ class TestDatasetsDBFunctions(TestCase): with db_mock.cursor() as cursor: cursor.execute.return_value = () self.assertEqual( - retrieve_geno_riset_fields(trait_name, db_mock), + retrieve_geno_group_fields(trait_name, db_mock), expected) cursor.execute.assert_called_once_with( ( -- cgit v1.2.3