Merge branch 'main' of https://github.com/genenetwork/genenetwork3 into feature/add_rqtl_pairscan

author: zsloan 2021-10-12 18:56:34 +0000
committer: zsloan 2021-10-12 18:56:34 +0000
commit: 0f396f4a1a753d449cf2975fc425d587d9350689 (patch)
tree: c9dac243dc05e5cb90ccb7f1d96fd599986bf60a /tests/unit/db
parent: 976660ce9ef915426c7ce5ff9077b439e4102a2c (diff)
parent: 77c274b79c3ec01de60e90db3299763cb58f715b (diff)
download: genenetwork3-0f396f4a1a753d449cf2975fc425d587d9350689.tar.gz
4 files changed, 224 insertions, 27 deletions
diff --git a/tests/unit/db/data/genotypes/genotype_sample1.geno b/tests/unit/db/data/genotypes/genotype_sample1.geno
new file mode 100644
index 0000000..2a55964
--- /dev/null
+++ b/tests/unit/db/data/genotypes/genotype_sample1.geno
@@ -0,0 +1,23 @@
+# File name: genotype_sample for testing
+
+# Metadata: Please retain this header information with file.
+
+
+@name: BXD
+@type: riset
+@mat:     B
+@pat: D
+@het:H
+@unk: U
+
+
+
+
+
+
+Chr	Locus	cM	Mb	BXD1	BXD2	BXD5	BXD6	BXD8	BXD9
+1	rs31443144	1.50	3.010274	B	B	D	D	D	B
+1	rs6269442	1.50	3.492195	B	B	D	D	H	Y
+1	rs32285189	1.63	3.511204	B	U	D	D	D	B
+2	rs31443144	1.50	3.010274	B	B	D	D	D	B
+2	rs6269442	1.50	3.492195	B	B	D	D	H	Y
+\ No newline at end of file
diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py
index 38de0e2..39f4af9 100644
--- a/tests/unit/db/test_datasets.py
+++ b/tests/unit/db/test_datasets.py
@@ -3,10 +3,10 @@
 from unittest import mock, TestCase
 from gn3.db.datasets import (
     retrieve_dataset_name,
-    retrieve_riset_fields,
-    retrieve_geno_riset_fields,
-    retrieve_publish_riset_fields,
-    retrieve_probeset_riset_fields)
+    retrieve_group_fields,
+    retrieve_geno_group_fields,
+    retrieve_publish_group_fields,
+    retrieve_probeset_group_fields)
 
 class TestDatasetsDBFunctions(TestCase):
     """Test cases for datasets functions."""
@@ -40,9 +40,9 @@ class TestDatasetsDBFunctions(TestCase):
                             table=table, cols=columns),
                         {"threshold": thresh, "name": dataset_name})
 
-    def test_retrieve_probeset_riset_fields(self):
+    def test_retrieve_probeset_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'ProbeSet' trait type.
         """
         for trait_name, expected in [
@@ -52,7 +52,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_probeset_riset_fields(trait_name, db_mock),
+                        retrieve_probeset_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
@@ -63,34 +63,34 @@ class TestDatasetsDBFunctions(TestCase):
                             " AND ProbeSetFreeze.Name = %(name)s"),
                         {"name": trait_name})
 
-    def test_retrieve_riset_fields(self):
+    def test_retrieve_group_fields(self):
         """
-        Test that the riset fields are set up correctly for the different trait
+        Test that the group fields are set up correctly for the different trait
         types.
         """
         for trait_type, trait_name, dataset_info, expected in [
                 ["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"},
-                 {"dataset_name": "pubDBName01", "riset": ""}],
+                 {"dataset_name": "pubDBName01", "group": ""}],
                 ["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"},
-                 {"dataset_name": "prbDBName01", "riset": ""}],
+                 {"dataset_name": "prbDBName01", "group": ""}],
                 ["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"},
-                 {"dataset_name": "genoDBName01", "riset": ""}],
-                ["Temp", "tempTraitName01", {}, {"riset": ""}],
+                 {"dataset_name": "genoDBName01", "group": ""}],
+                ["Temp", "tempTraitName01", {}, {"group": ""}],
                 ]:
             db_mock = mock.MagicMock()
             with self.subTest(
                     trait_type=trait_type, trait_name=trait_name,
                     dataset_info=dataset_info):
                 with db_mock.cursor() as cursor:
-                    cursor.execute.return_value = ("riset_name", 0)
+                    cursor.execute.return_value = ("group_name", 0)
                     self.assertEqual(
-                        retrieve_riset_fields(
+                        retrieve_group_fields(
                             trait_type, trait_name, dataset_info, db_mock),
                         expected)
 
-    def test_retrieve_publish_riset_fields(self):
+    def test_retrieve_publish_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'Publish' trait type.
         """
         for trait_name, expected in [
@@ -100,7 +100,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_publish_riset_fields(trait_name, db_mock),
+                        retrieve_publish_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
@@ -110,9 +110,9 @@ class TestDatasetsDBFunctions(TestCase):
                             " AND PublishFreeze.Name = %(name)s"),
                         {"name": trait_name})
 
-    def test_retrieve_geno_riset_fields(self):
+    def test_retrieve_geno_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'Geno' trait type.
         """
         for trait_name, expected in [
@@ -122,7 +122,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_geno_riset_fields(trait_name, db_mock),
+                        retrieve_geno_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
new file mode 100644
index 0000000..c125224
--- /dev/null
+++ b/tests/unit/db/test_genotypes.py
@@ -0,0 +1,170 @@
+"""Tests gn3.db.genotypes"""
+from unittest import TestCase
+from gn3.db.genotypes import (
+    parse_genotype_file,
+    parse_genotype_labels,
+    parse_genotype_header,
+    parse_genotype_marker,
+    build_genotype_chromosomes)
+
+class TestGenotypes(TestCase):
+    """Tests for functions in `gn3.db.genotypes`."""
+
+    def test_parse_genotype_labels(self):
+        """Test that the genotype labels are parsed correctly."""
+        self.assertEqual(
+            parse_genotype_labels([
+                "@name: test_group\t", "@filler: test_filler    ",
+                "@type:test_type", "@mat:test_mat   \t", "@pat:test_pat ",
+                "@het: test_het ", "@unk: test_unk", "@other: test_other",
+                "@brrr: test_brrr "]),
+            (("group", "test_group"), ("filler", "test_filler"),
+             ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
+             ("het", "test_het"), ("unk", "test_unk")))
+
+    def test_parse_genotype_header(self):
+        """Test that the genotype header is parsed correctly."""
+        for header, expected in [
+                [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t"
+                  "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"),
+                 (("Mbmap", True), ("cm_column", 2), ("mb_column", 3),
+                  ("prgy",
+                   ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+                    "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18",
+                    "BXD19")),
+                  ("nprgy", 14))],
+                [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11"
+                  "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"),
+                 (("Mbmap", False), ("cm_column", 2), ("mb_column", None),
+                  ("prgy",
+                   ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+                    "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")),
+                  ("nprgy", 13))]]:
+            with self.subTest(header=header):
+                self.assertEqual(parse_genotype_header(header), expected)
+
+    def test_parse_genotype_data_line(self):
+        """Test parsing of data lines."""
+        for line, geno_obj, parlist, expected in [
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+                 {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+                  "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+                  "mb_column": 3},
+                 tuple(),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype",
+                   ("U", "U", "U", "U", "U", "U", "U", "U", "U", "U")))],
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+                 {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+                  "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+                  "mb_column": 3},
+                 ("some", "parlist", "content"),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype",
+                   (-1, 1, "U", "U", "U", "U", "U", "U", "U", "U")))],
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tH\tD\tB\tU\tD\tB\tB",
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": True, "mb_column": 3},
+                 tuple(),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]:
+            with self.subTest(line=line):
+                self.assertEqual(
+                    parse_genotype_marker(line, geno_obj, parlist),
+                    expected)
+
+    def test_build_genotype_chromosomes(self):
+        """
+        Given `markers` and `geno_obj`, test that `build_genotype_chromosomes`
+        builds a sequence of chromosomes with the given markers ordered
+        according to the `chr` value."""
+        for markers, geno_obj, expected in [
+                [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", 3.0),
+                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1))),
+                  (("chr", "2"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", 3.0),
+                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": True, "mb_column": 3},
+                 ((("name", "1"), ("mb_exists", True), ("cm_column", 2),
+                   ("mb_column", 3),
+                   ("loci",
+                    ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+                      "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))),
+                  (("name", "2"), ("mb_exists", True), ("cm_column", 2),
+                   ("mb_column", 3),
+                   ("loci",
+                    ({"chr": "2", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+                      "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))))],
+                [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", None),
+                   ("genotype", (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": False, "mb_column": None},
+                 ((("name", "1"), ("mb_exists", False), ("cm_column", 2),
+                   ("mb_column", None),
+                   ("loci",
+                    ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None,
+                      "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]:
+            with self.subTest(markers=markers):
+                self.assertEqual(
+                    build_genotype_chromosomes(geno_obj, markers),
+                    expected)
+
+    def test_parse_genotype_file(self):
+        """Test the parsing of genotype files. """
+        self.assertEqual(
+            parse_genotype_file(
+                "tests/unit/db/data/genotypes/genotype_sample1.geno"),
+            {"group": "BXD",
+             "type": "riset",
+             "mat": "B",
+             "pat": "D",
+             "het": "H",
+             "unk": "U",
+             "Mbmap": True,
+             "cm_column": 2,
+             "mb_column": 3,
+             "prgy": ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"),
+             "nprgy": 6,
+             "chromosomes": (
+                 {"name": "1",
+                  "mb_exists": True,
+                  "cm_column": 2,
+                  "mb_column": 3,
+                  "loci": (
+                      {"chr": "1",
+                       "name": "rs31443144",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 1, -1)
+                       },
+                      {"chr": "1",
+                       "name": "rs6269442",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 0, "U")},
+                      {"chr": "1",
+                       "name": "rs32285189",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, "U", 1, 1, 1, -1)})},
+                 {"name": "2",
+                  "mb_exists": True,
+                  "cm_column": 2,
+                  "mb_column": 3,
+                  "loci": (
+                      {"chr": "2",
+                       "name": "rs31443144",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 1, -1)},
+                      {"chr": "2",
+                       "name": "rs6269442",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 0, "U")})})})
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index ee98893..8af8e82 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -166,15 +166,19 @@ class TestTraitsDBFunctions(TestCase):
         the right calls.
 
         """
+        # pylint: disable=C0103
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-        PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
-                                 "WHERE StrainId = %s AND Id = %s")
-        PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
-                               "WHERE StrainId = %s AND DataId = %s")
-        N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
-                             "WHERE StrainId = %s AND DataId = %s")
+        PUBLISH_DATA_SQL: str = (
+            "UPDATE PublishData SET value = %s "
+            "WHERE StrainId = %s AND Id = %s")
+        PUBLISH_SE_SQL: str = (
+            "UPDATE PublishSE SET error = %s "
+            "WHERE StrainId = %s AND DataId = %s")
+        N_STRAIN_SQL: str = (
+            "UPDATE NStrain SET count = %s "
+            "WHERE StrainId = %s AND DataId = %s")
 
         with db_mock.cursor() as cursor:
             type(cursor).rowcount = 1
author	zsloan	2021-10-12 18:56:34 +0000
committer	zsloan	2021-10-12 18:56:34 +0000
commit	0f396f4a1a753d449cf2975fc425d587d9350689 (patch)
tree	c9dac243dc05e5cb90ccb7f1d96fd599986bf60a /tests/unit/db
parent	976660ce9ef915426c7ce5ff9077b439e4102a2c (diff)
parent	77c274b79c3ec01de60e90db3299763cb58f715b (diff)
download	genenetwork3-0f396f4a1a753d449cf2975fc425d587d9350689.tar.gz