From 221c773daea839ecf0e50c196484bb91e3a6db33 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 1 Sep 2021 06:18:20 +0300
Subject: Implement parsing of genotype labels

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/db/genotypes.py: parse genotype labels
* tests/unit/db/test_genotypes.py: test that genotype labels are parsed
  correctly

  As part of parsing the genotype files into usable python data structures,
  this commit adds a function to parse the label lines (beginning with "@")
  into the appropriate values.
---
 tests/unit/db/test_genotypes.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 tests/unit/db/test_genotypes.py

(limited to 'tests/unit/db')

diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
new file mode 100644
index 0000000..0264764
--- /dev/null
+++ b/tests/unit/db/test_genotypes.py
@@ -0,0 +1,17 @@
+"""Tests gn3.db.genotypes"""
+from unittest import TestCase
+from gn3.db.genotypes import parse_genotype_labels
+
+class TestGenotypes(TestCase):
+    """Tests for functions in `gn3.db.genotypes`."""
+
+    def test_parse_genotype_labels(self):
+        self.assertEqual(
+            parse_genotype_labels([
+                "@name: test_group\t", "@filler: test_filler    ",
+                "@type:test_type", "@mat:test_mat   \t", "@pat:test_pat ",
+                "@het: test_het ", "@unk: test_unk", "@other: test_other",
+                "@brrr: test_brrr "]),
+        (("group", "test_group"), ("filler", "test_filler"),
+         ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
+         ("het", "test_het"), ("unk", "test_unk")))
-- 
cgit v1.2.3


From b975e0cfd1d0adc5f51e66292d29d4651d3f053f Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 1 Sep 2021 07:35:40 +0300
Subject: Parse the genotype file's data header

* gn3/db/genotypes.py: parse data header
* tests/unit/db/test_genotypes.py: check that header's parse works correctly.

  Add tests to check that the parser works as expected. Add code to implement
  the parsing and pass the tests.
---
 gn3/db/genotypes.py             | 19 +++++++++++++++++++
 tests/unit/db/test_genotypes.py | 22 +++++++++++++++++++++-
 2 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'tests/unit/db')

diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 2be3e1a..be0dfc2 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -87,3 +87,22 @@ def parse_genotype_labels(lines: list):
     return tuple(
         item for item in (__parse_label(line) for line in lines)
         if item is not None)
+
+def parse_genotype_header(line: str, parlist = tuple()):
+    """
+    Parse the genotype file header line
+
+    DESCRIPTION:
+    Reworks
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114
+    """
+    items = [item.strip() for item in line.split("\t")]
+    Mbmap = "Mb" in items
+    prgy = ((parlist + tuple(items[4:])) if Mbmap
+            else (parlist + tuple(items[3:])))
+    return (
+        ("Mbmap", Mbmap),
+        ("cm_column", items.index("cM")),
+        ("mb_column", None if not Mbmap else items.index("Mb")),
+        ("prgy", prgy),
+        ("nprgy", len(prgy)))
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
index 0264764..4fa8a53 100644
--- a/tests/unit/db/test_genotypes.py
+++ b/tests/unit/db/test_genotypes.py
@@ -1,6 +1,6 @@
 """Tests gn3.db.genotypes"""
 from unittest import TestCase
-from gn3.db.genotypes import parse_genotype_labels
+from gn3.db.genotypes import parse_genotype_labels, parse_genotype_header
 
 class TestGenotypes(TestCase):
     """Tests for functions in `gn3.db.genotypes`."""
@@ -15,3 +15,23 @@ class TestGenotypes(TestCase):
         (("group", "test_group"), ("filler", "test_filler"),
          ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
          ("het", "test_het"), ("unk", "test_unk")))
+
+    def test_parse_genotype_header(self):
+        for header, expected in [
+                [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t"
+                  "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"),
+                 (("Mbmap", True), ("cm_column", 2), ("mb_column", 3),
+                  ("prgy",
+                   ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+                    "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18",
+                    "BXD19")),
+                  ("nprgy", 14))],
+                [("Chr\tLocus\tcM\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\tBXD11"
+                  "\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18"),
+                 (("Mbmap", False), ("cm_column", 2), ("mb_column", None),
+                  ("prgy",
+                   ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9", "BXD11",
+                    "BXD12", "BXD13", "BXD14", "BXD15", "BXD16", "BXD18")),
+                  ("nprgy", 13))]]:
+            with self.subTest(header=header):
+                self.assertEqual(parse_genotype_header(header), expected)
-- 
cgit v1.2.3


From a1c217cf277feda3815a8435d6c8909f1b5546a1 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 1 Sep 2021 09:11:17 +0300
Subject: Parse data lines into markers

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/db/genotypes.py: parse data lines in file to genetic markers.
* tests/unit/db/test_genotypes.py: test that parsing works.

  Add some tests to check that the parsing of the markers works as expected,
  and add the code to actually parse the markers.
---
 gn3/db/genotypes.py             | 37 +++++++++++++++++++++++++++++++++++++
 tests/unit/db/test_genotypes.py | 38 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'tests/unit/db')

diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index be0dfc2..8710d2e 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -106,3 +106,40 @@ def parse_genotype_header(line: str, parlist = tuple()):
         ("mb_column", None if not Mbmap else items.index("Mb")),
         ("prgy", prgy),
         ("nprgy", len(prgy)))
+
+def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list):
+    """
+    Parse a data line in a genotype file
+
+    DESCRIPTION:
+    Reworks
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L143-L190
+    """
+    marker_row = [item.strip() for item in line.split("\t")]
+    geno_table = {
+        geno_obj["mat"]: -1, geno_obj["pat"]: 1, geno_obj["het"]: 0,
+        geno_obj["unk"]: "U"
+    }
+    start_pos = 4 if geno_obj["Mbmap"] else 3
+    if len(parlist) > 0:
+        start_pos = start_pos + 2
+
+    alleles = marker_row[start_pos:]
+    genotype = tuple(
+        (geno_table[allele] if allele in geno_table.keys() else "U")
+        for allele in alleles)
+    if len(parlist) > 0:
+        genotype = (-1, 1) + genotype
+    try:
+        cM = float(geno_obj["cm_column"])
+    except:
+        if geno_obj["Mbmap"]:
+            cM = float(geno_obj["mb_column"])
+        else:
+            cM = 0
+    return (
+        ("chr", marker_row[0]),
+        ("name", marker_row[1]),
+        ("cM", cM),
+        ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None),
+        ("genotype", genotype))
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
index 4fa8a53..ba90191 100644
--- a/tests/unit/db/test_genotypes.py
+++ b/tests/unit/db/test_genotypes.py
@@ -1,11 +1,13 @@
 """Tests gn3.db.genotypes"""
 from unittest import TestCase
-from gn3.db.genotypes import parse_genotype_labels, parse_genotype_header
+from gn3.db.genotypes import (
+    parse_genotype_labels, parse_genotype_header, parse_genotype_data_line)
 
 class TestGenotypes(TestCase):
     """Tests for functions in `gn3.db.genotypes`."""
 
     def test_parse_genotype_labels(self):
+        """Test that the genotype labels are parsed correctly."""
         self.assertEqual(
             parse_genotype_labels([
                 "@name: test_group\t", "@filler: test_filler    ",
@@ -17,6 +19,7 @@ class TestGenotypes(TestCase):
          ("het", "test_het"), ("unk", "test_unk")))
 
     def test_parse_genotype_header(self):
+        """Test that the genotype header is parsed correctly."""
         for header, expected in [
                 [("Chr\tLocus\tcM\tMb\tBXD1\tBXD2\tBXD5\tBXD6\tBXD8\tBXD9\t"
                   "BXD11\tBXD12\tBXD13\tBXD14\tBXD15\tBXD16\tBXD18\tBXD19"),
@@ -35,3 +38,36 @@ class TestGenotypes(TestCase):
                   ("nprgy", 13))]]:
             with self.subTest(header=header):
                 self.assertEqual(parse_genotype_header(header), expected)
+
+    def test_parse_genotype_data_line(self):
+        """Test parsing of data lines."""
+        for line, geno_obj, parlist, expected in [
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+                 {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+                  "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+                  "mb_column": 3},
+                 tuple(),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype",
+                   ("U", "U", "U", "U", "U", "U", "U", "U", "U", "U")))],
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tD\tD\tB\tB\tD\tB\tB",
+                 {"mat": "test_mat", "pat": "test_pat", "het": "test_het",
+                  "unk": "test_unk", "cm_column": 2, "Mbmap": True,
+                  "mb_column": 3},
+                 ("some", "parlist", "content"),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype",
+                   (-1, 1, "U", "U", "U", "U", "U", "U", "U", "U")))],
+                ["1\trs31443144\t1.50\t3.010274\tB\tB\tD\tH\tD\tB\tU\tD\tB\tB",
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": True, "mb_column": 3},
+                 tuple(),
+                 (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                  ("Mb", 3.0),
+                  ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]:
+            with self.subTest(line = line):
+                self.assertEqual(
+                    parse_genotype_data_line(line, geno_obj, parlist),
+                    expected)
-- 
cgit v1.2.3


From abfc0410a2385d8c3d6ee1915fc99b708e1d0dbc Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 1 Sep 2021 10:49:52 +0300
Subject: Built top-level genotype file parsing function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* gn3/db/genotypes.py: parse genotype files
* tests/unit/db/test_genotypes.py: test parsing is correct

  Add the overall genotype files parsing function and tests to check that the
  parsing works as expected.
---
 gn3/db/genotypes.py             |  38 ++++++++++++++-
 tests/unit/db/test_genotypes.py | 101 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 136 insertions(+), 3 deletions(-)

(limited to 'tests/unit/db')

diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 8710d2e..b5d14a5 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -107,7 +107,7 @@ def parse_genotype_header(line: str, parlist = tuple()):
         ("prgy", prgy),
         ("nprgy", len(prgy)))
 
-def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list):
+def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
     """
     Parse a data line in a genotype file
 
@@ -143,3 +143,39 @@ def parse_genotype_data_line(line: str, geno_obj: dict, parlist: list):
         ("cM", cM),
         ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None),
         ("genotype", genotype))
+
+def build_genotype_chromosomes(geno_obj, markers):
+    """
+    Build up the chromosomes from the given markers and partially built geno
+    object
+    """
+    mrks = [dict(marker) for marker in markers]
+    chr_names = {marker["chr"] for marker in mrks}
+    return tuple((
+        ("name", chr_name), ("mb_exists", geno_obj["Mbmap"]), ("cm_column", 2),
+        ("mb_column", geno_obj["mb_column"]),
+        ("loci", tuple(marker for marker in mrks if marker["chr"] == chr_name)))
+           for chr_name in sorted(chr_names))
+
+def parse_genotype_file(filename: str, parlist = tuple()):
+    """
+    Parse the provided genotype file into a usable pytho3 data structure.
+    """
+    with open(filename, "r") as infile:
+        contents = infile.readlines()
+
+    lines = tuple(line for line in contents if
+             ((not line.strip().startswith("#")) and
+              (not line.strip() == "")))
+    labels = parse_genotype_labels(
+        line for line in lines if line.startswith("@"))
+    data_lines = tuple(line for line in lines if not line.startswith("@"))
+    header = parse_genotype_header(data_lines[0], parlist)
+    geno_obj = dict(labels + header)
+    markers = tuple(
+        parse_genotype_marker(line, geno_obj, parlist)
+        for line in data_lines[1:])
+    chromosomes = tuple(
+        dict(chromosome) for chromosome in
+        build_genotype_chromosomes(geno_obj, markers))
+    return {**geno_obj, "chromosomes": chromosomes}
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
index ba90191..a05ce48 100644
--- a/tests/unit/db/test_genotypes.py
+++ b/tests/unit/db/test_genotypes.py
@@ -1,7 +1,11 @@
 """Tests gn3.db.genotypes"""
 from unittest import TestCase
 from gn3.db.genotypes import (
-    parse_genotype_labels, parse_genotype_header, parse_genotype_data_line)
+    parse_genotype_file,
+    parse_genotype_labels,
+    parse_genotype_header,
+    parse_genotype_marker,
+    build_genotype_chromosomes)
 
 class TestGenotypes(TestCase):
     """Tests for functions in `gn3.db.genotypes`."""
@@ -69,5 +73,98 @@ class TestGenotypes(TestCase):
                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]:
             with self.subTest(line = line):
                 self.assertEqual(
-                    parse_genotype_data_line(line, geno_obj, parlist),
+                    parse_genotype_marker(line, geno_obj, parlist),
                     expected)
+
+    def test_build_genotype_chromosomes(self):
+        """
+        Given `markers` and `geno_obj`, test that `build_genotype_chromosomes`
+        builds a sequence of chromosomes with the given markers ordered
+        according to the `chr` value."""
+        for markers, geno_obj, expected in [
+                [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", 3.0),
+                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1))),
+                  (("chr", "2"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", 3.0),
+                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": True, "mb_column": 3},
+                 ((("name", "1"), ("mb_exists", True), ("cm_column", 2),
+                   ("mb_column", 3),
+                   ("loci",
+                    ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+                      "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))),
+                  (("name", "2"), ("mb_exists", True), ("cm_column", 2),
+                   ("mb_column", 3),
+                   ("loci",
+                    ({"chr": "2", "name": "rs31443144", "cM": 2.0, "Mb": 3.0,
+                      "genotype": (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)},))))],
+                [[(("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
+                   ("Mb", None),
+                   ("genotype", (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)))],
+                 {"mat": "B", "pat": "D", "het": "H", "unk": "U",
+                  "cm_column": 2, "Mbmap": False, "mb_column": None},
+                 ((("name", "1"), ("mb_exists", False), ("cm_column", 2),
+                   ("mb_column", None),
+                   ("loci",
+                    ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None,
+                      "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]:
+            with self.subTest(markers = markers):
+                self.assertEqual(
+                    build_genotype_chromosomes(geno_obj, markers),
+                    expected)
+
+    def test_parse_genotype_file(self):
+        """Test the parsing of genotype files. """
+        self.assertEqual(
+            parse_genotype_file(
+                "tests/unit/db/data/genotypes/genotype_sample1.geno"),
+            {"group": "BXD",
+             "type": "riset",
+             "mat": "B",
+             "pat": "D",
+             "het": "H",
+             "unk": "U",
+             "Mbmap": True,
+             "cm_column": 2,
+             "mb_column": 3,
+             "prgy": ("BXD1", "BXD2", "BXD5", "BXD6", "BXD8", "BXD9"),
+             "nprgy": 6,
+             "chromosomes": (
+                 {"name": "1",
+                  "mb_exists": True,
+                  "cm_column": 2,
+                  "mb_column": 3,
+                  "loci": (
+                      {"chr": "1",
+                       "name": "rs31443144",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 1, -1)
+                       },
+                      {"chr": "1",
+                       "name": "rs6269442",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 0, "U")},
+                      {"chr": "1",
+                       "name": "rs32285189",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, "U", 1, 1, 1, -1)})},
+                 {"name": "2",
+                  "mb_exists": True,
+                  "cm_column": 2,
+                  "mb_column": 3,
+                  "loci": (
+                      {"chr": "2",
+                       "name": "rs31443144",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 1, -1)},
+                      {"chr": "2",
+                       "name": "rs6269442",
+                       "cM": 2.0,
+                       "Mb": 3.0,
+                       "genotype": (-1, -1, 1, 1, 0, "U")})})})
-- 
cgit v1.2.3


From 3ded952f40f486d9aa69746eac2afe7f67fef790 Mon Sep 17 00:00:00 2001
From: Muriithi Frederick Muriuki
Date: Wed, 1 Sep 2021 11:08:38 +0300
Subject: Fix linting and typing issues

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi
---
 gn3/db/genotypes.py             | 32 ++++++++++++++++----------------
 tests/unit/db/test_genotypes.py | 10 +++++-----
 2 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'tests/unit/db')

diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index b5d14a5..b03d55c 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -88,7 +88,7 @@ def parse_genotype_labels(lines: list):
         item for item in (__parse_label(line) for line in lines)
         if item is not None)
 
-def parse_genotype_header(line: str, parlist = tuple()):
+def parse_genotype_header(line: str, parlist: tuple = tuple()):
     """
     Parse the genotype file header line
 
@@ -97,13 +97,13 @@ def parse_genotype_header(line: str, parlist = tuple()):
     https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/utility/gen_geno_ob.py#L94-L114
     """
     items = [item.strip() for item in line.split("\t")]
-    Mbmap = "Mb" in items
-    prgy = ((parlist + tuple(items[4:])) if Mbmap
+    mbmap = "Mb" in items
+    prgy = ((parlist + tuple(items[4:])) if mbmap
             else (parlist + tuple(items[3:])))
     return (
-        ("Mbmap", Mbmap),
+        ("Mbmap", mbmap),
         ("cm_column", items.index("cM")),
-        ("mb_column", None if not Mbmap else items.index("Mb")),
+        ("mb_column", None if not mbmap else items.index("Mb")),
         ("prgy", prgy),
         ("nprgy", len(prgy)))
 
@@ -131,16 +131,16 @@ def parse_genotype_marker(line: str, geno_obj: dict, parlist: list):
     if len(parlist) > 0:
         genotype = (-1, 1) + genotype
     try:
-        cM = float(geno_obj["cm_column"])
+        cm_val = float(geno_obj["cm_column"])
     except:
         if geno_obj["Mbmap"]:
-            cM = float(geno_obj["mb_column"])
+            cm_val = float(geno_obj["mb_column"])
         else:
-            cM = 0
+            cm_val = 0
     return (
         ("chr", marker_row[0]),
         ("name", marker_row[1]),
-        ("cM", cM),
+        ("cM", cm_val),
         ("Mb", float(geno_obj["mb_column"]) if geno_obj["Mbmap"] else None),
         ("genotype", genotype))
 
@@ -155,9 +155,9 @@ def build_genotype_chromosomes(geno_obj, markers):
         ("name", chr_name), ("mb_exists", geno_obj["Mbmap"]), ("cm_column", 2),
         ("mb_column", geno_obj["mb_column"]),
         ("loci", tuple(marker for marker in mrks if marker["chr"] == chr_name)))
-           for chr_name in sorted(chr_names))
+                 for chr_name in sorted(chr_names))
 
-def parse_genotype_file(filename: str, parlist = tuple()):
+def parse_genotype_file(filename: str, parlist: tuple = tuple()):
     """
     Parse the provided genotype file into a usable pytho3 data structure.
     """
@@ -165,16 +165,16 @@ def parse_genotype_file(filename: str, parlist = tuple()):
         contents = infile.readlines()
 
     lines = tuple(line for line in contents if
-             ((not line.strip().startswith("#")) and
-              (not line.strip() == "")))
+                  ((not line.strip().startswith("#")) and
+                   (not line.strip() == "")))
     labels = parse_genotype_labels(
-        line for line in lines if line.startswith("@"))
+        [line for line in lines if line.startswith("@")])
     data_lines = tuple(line for line in lines if not line.startswith("@"))
     header = parse_genotype_header(data_lines[0], parlist)
     geno_obj = dict(labels + header)
     markers = tuple(
-        parse_genotype_marker(line, geno_obj, parlist)
-        for line in data_lines[1:])
+        [parse_genotype_marker(line, geno_obj, parlist)
+        for line in data_lines[1:]])
     chromosomes = tuple(
         dict(chromosome) for chromosome in
         build_genotype_chromosomes(geno_obj, markers))
diff --git a/tests/unit/db/test_genotypes.py b/tests/unit/db/test_genotypes.py
index a05ce48..c125224 100644
--- a/tests/unit/db/test_genotypes.py
+++ b/tests/unit/db/test_genotypes.py
@@ -18,9 +18,9 @@ class TestGenotypes(TestCase):
                 "@type:test_type", "@mat:test_mat   \t", "@pat:test_pat ",
                 "@het: test_het ", "@unk: test_unk", "@other: test_other",
                 "@brrr: test_brrr "]),
-        (("group", "test_group"), ("filler", "test_filler"),
-         ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
-         ("het", "test_het"), ("unk", "test_unk")))
+            (("group", "test_group"), ("filler", "test_filler"),
+             ("type", "test_type"), ("mat", "test_mat"), ("pat", "test_pat"),
+             ("het", "test_het"), ("unk", "test_unk")))
 
     def test_parse_genotype_header(self):
         """Test that the genotype header is parsed correctly."""
@@ -71,7 +71,7 @@ class TestGenotypes(TestCase):
                  (("chr", "1"), ("name", "rs31443144"), ("cM", 2.0),
                   ("Mb", 3.0),
                   ("genotype", (-1, -1, 1, 0, 1, -1, "U", 1, -1, -1)))]]:
-            with self.subTest(line = line):
+            with self.subTest(line=line):
                 self.assertEqual(
                     parse_genotype_marker(line, geno_obj, parlist),
                     expected)
@@ -110,7 +110,7 @@ class TestGenotypes(TestCase):
                    ("loci",
                     ({"chr": "1", "name": "rs31443144", "cM": 2.0, "Mb": None,
                       "genotype": (-1, 1, 1, 0, 1, -1, "U", 1, -1, -1)},))),)]]:
-            with self.subTest(markers = markers):
+            with self.subTest(markers=markers):
                 self.assertEqual(
                     build_genotype_chromosomes(geno_obj, markers),
                     expected)
-- 
cgit v1.2.3


From ed2e4c0f9d68cfb720da95eba559d69359f7b5fc Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 15 Sep 2021 05:35:34 +0300
Subject: Add missing sample file for tests

* tests/unit/db/data/genotypes/genotype_sample1.geno: new file

  Add a missing sample data file needed for unit tests.
---
 tests/unit/db/data/genotypes/genotype_sample1.geno | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 tests/unit/db/data/genotypes/genotype_sample1.geno

(limited to 'tests/unit/db')

diff --git a/tests/unit/db/data/genotypes/genotype_sample1.geno b/tests/unit/db/data/genotypes/genotype_sample1.geno
new file mode 100644
index 0000000..2a55964
--- /dev/null
+++ b/tests/unit/db/data/genotypes/genotype_sample1.geno
@@ -0,0 +1,23 @@
+# File name: genotype_sample for testing
+
+# Metadata: Please retain this header information with file.
+
+
+@name: BXD
+@type: riset
+@mat:     B
+@pat: D
+@het:H
+@unk: U
+
+
+
+
+
+
+Chr	Locus	cM	Mb	BXD1	BXD2	BXD5	BXD6	BXD8	BXD9
+1	rs31443144	1.50	3.010274	B	B	D	D	D	B
+1	rs6269442	1.50	3.492195	B	B	D	D	H	Y
+1	rs32285189	1.63	3.511204	B	U	D	D	D	B
+2	rs31443144	1.50	3.010274	B	B	D	D	D	B
+2	rs6269442	1.50	3.492195	B	B	D	D	H	Y
\ No newline at end of file
-- 
cgit v1.2.3


From 56c73324c285d896567268370f3955bbd15754b0 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Sep 2021 09:02:46 +0300
Subject: Fix more pylint errors

---
 gn3/computations/qtlreaper.py | 3 ++-
 gn3/db/genotypes.py           | 2 +-
 tests/unit/db/test_traits.py  | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'tests/unit/db')

diff --git a/gn3/computations/qtlreaper.py b/gn3/computations/qtlreaper.py
index 5ddea76..8b2893e 100644
--- a/gn3/computations/qtlreaper.py
+++ b/gn3/computations/qtlreaper.py
@@ -74,7 +74,8 @@ def run_reaper(
     if separate_nperm_output:
         permu_output_filename: Union[None, str] = "{}/qtlreaper/permu_output_{}.txt".format(
             output_dir, random_string(10))
-        output_list = output_list + ["--permu_output", permu_output_filename] # type: ignore[list-item]
+        output_list = output_list + [
+            "--permu_output", permu_output_filename] # type: ignore[list-item]
     else:
         permu_output_filename = None
 
diff --git a/gn3/db/genotypes.py b/gn3/db/genotypes.py
index 9ea9f20..9987320 100644
--- a/gn3/db/genotypes.py
+++ b/gn3/db/genotypes.py
@@ -56,7 +56,7 @@ def __load_genotype_samples_from_geno(genotype_filename: str):
             continue
         break
 
-    headers = line.split("\t" ) # type: ignore[arg-type]
+    headers = line.split("\t") # type: ignore[arg-type]
     if headers[3] == "Mb":
         return headers[4:]
     return headers[3:]
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index ee98893..baa2af3 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -166,6 +166,7 @@ class TestTraitsDBFunctions(TestCase):
         the right calls.
 
         """
+        # pylint: disable=C0103
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-- 
cgit v1.2.3


From 95c5c0e73bffbf0287a17309e703063ee54d25ba Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 23 Sep 2021 03:45:19 +0300
Subject: Refactor: Move common sample data to separate file

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Move common sample test data into a separate file where it can be imported
  from, to prevent pylint error R0801 which proved tricky to silence in any
  other way.
---
 tests/unit/computations/test_qtlreaper.py |  68 ++++--------------
 tests/unit/db/test_traits.py              |  15 ++--
 tests/unit/sample_test_data.py            | 111 ++++++++++++++++++++++++++++++
 tests/unit/test_heatmaps.py               |  96 +-------------------------
 4 files changed, 134 insertions(+), 156 deletions(-)
 create mode 100644 tests/unit/sample_test_data.py

(limited to 'tests/unit/db')

diff --git a/tests/unit/computations/test_qtlreaper.py b/tests/unit/computations/test_qtlreaper.py
index d420470..742d106 100644
--- a/tests/unit/computations/test_qtlreaper.py
+++ b/tests/unit/computations/test_qtlreaper.py
@@ -4,6 +4,7 @@ from gn3.computations.qtlreaper import (
     parse_reaper_main_results,
     organise_reaper_main_results,
     parse_reaper_permutation_results)
+from tests.unit.sample_test_data import organised_trait_1
 
 class TestQTLReaper(TestCase):
     """Class for testing qtlreaper interface functions."""
@@ -81,99 +82,54 @@ class TestQTLReaper(TestCase):
         self.assertEqual(
             organise_reaper_main_results([
                 {
-                    "ID": "T1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
+                    "ID": "1", "Locus": "rs31443144", "Chr": 1, "cM": 1.500,
                     "Mb": 3.010, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
+                    "ID": "1", "Locus": "rs6269442", "Chr": 1, "cM": 1.500,
                     "Mb": 3.492, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
+                    "ID": "1", "Locus": "rs32285189", "Chr": 1, "cM": 1.630,
                     "Mb": 3.511, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
+                    "ID": "1", "Locus": "rs258367496", "Chr": 1, "cM": 1.630,
                     "Mb": 3.660, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
+                    "ID": "1", "Locus": "rs32430919", "Chr": 1, "cM": 1.750,
                     "Mb": 3.777, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
+                    "ID": "1", "Locus": "rs36251697", "Chr": 1, "cM": 1.880,
                     "Mb": 3.812, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
+                    "ID": "1", "Locus": "rs30658298", "Chr": 1, "cM": 2.010,
                     "Mb": 4.431, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
+                    "ID": "1", "Locus": "rs51852623", "Chr": 2, "cM": 2.010,
                     "Mb": 4.447, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
+                    "ID": "1", "Locus": "rs31879829", "Chr": 2, "cM": 2.140,
                     "Mb": 4.519, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 },
                 {
-                    "ID": "T1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
+                    "ID": "1", "Locus": "rs36742481", "Chr": 2, "cM": 2.140,
                     "Mb": 4.776, "LRS": 0.500, "Additive": -0.074,
                     "pValue": 1.000
                 }
             ]),
-            {"T1": {"ID": "T1",
-                    "chromosomes": {
-                        1: {"Chr": 1,
-                            "loci": [
-                                {
-                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]},
-                        2: {"Chr": 2,
-                            "loci": [
-                                {
-                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]}}}})
+            organised_trait_1)
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index baa2af3..8af8e82 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -170,12 +170,15 @@ class TestTraitsDBFunctions(TestCase):
         db_mock = mock.MagicMock()
 
         STRAIN_ID_SQL: str = "UPDATE Strain SET Name = %s WHERE Id = %s"
-        PUBLISH_DATA_SQL: str = ("UPDATE PublishData SET value = %s "
-                                 "WHERE StrainId = %s AND Id = %s")
-        PUBLISH_SE_SQL: str = ("UPDATE PublishSE SET error = %s "
-                               "WHERE StrainId = %s AND DataId = %s")
-        N_STRAIN_SQL: str = ("UPDATE NStrain SET count = %s "
-                             "WHERE StrainId = %s AND DataId = %s")
+        PUBLISH_DATA_SQL: str = (
+            "UPDATE PublishData SET value = %s "
+            "WHERE StrainId = %s AND Id = %s")
+        PUBLISH_SE_SQL: str = (
+            "UPDATE PublishSE SET error = %s "
+            "WHERE StrainId = %s AND DataId = %s")
+        N_STRAIN_SQL: str = (
+            "UPDATE NStrain SET count = %s "
+            "WHERE StrainId = %s AND DataId = %s")
 
         with db_mock.cursor() as cursor:
             type(cursor).rowcount = 1
diff --git a/tests/unit/sample_test_data.py b/tests/unit/sample_test_data.py
new file mode 100644
index 0000000..407d074
--- /dev/null
+++ b/tests/unit/sample_test_data.py
@@ -0,0 +1,111 @@
+"""
+This module holds a collection of sample data variables, used in more than one
+ test.
+
+This is mostly to avoid the `duplicate-code` pylint error that gets raised if
+the same data is defined in more than one file. It has been found that adding
+the `# pylint: disable=R0801` or `# pylint: disable=duplicate-code` to the top
+of the file seems to not work as expected.
+
+Adding these same declarations to .pylintrc is not an option, since that,
+seemingly, would deactivate the warnings for all code in the project: We do not
+want that.
+"""
+
+organised_trait_1 = {
+    "1": {
+        "ID": "1",
+        "chromosomes": {
+            1: {"Chr": 1,
+                "loci": [
+                    {
+                        "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]},
+            2: {"Chr": 2,
+                "loci": [
+                    {
+                        "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]}}}}
+
+organised_trait_2 = {
+    "2": {
+        "ID": "2",
+        "chromosomes": {
+            1: {"Chr": 1,
+                "loci": [
+                    {
+                        "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    }]},
+            2: {"Chr": 2,
+                "loci": [
+                    {
+                        "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
+                        "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
+                    },
+                    {
+                        "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
+                        "LRS": 0.579, "Additive": -0.074, "pValue": 1.000
+                    }]}}}}
diff --git a/tests/unit/test_heatmaps.py b/tests/unit/test_heatmaps.py
index c0a496b..fd91cf9 100644
--- a/tests/unit/test_heatmaps.py
+++ b/tests/unit/test_heatmaps.py
@@ -7,6 +7,7 @@ from gn3.heatmaps import (
     compute_traits_order,
     retrieve_strains_and_values,
     process_traits_data_for_heatmap)
+from tests.unit.sample_test_data import organised_trait_1, organised_trait_2
 
 strainlist = ["B6cC3-1", "BXD1", "BXD12", "BXD16", "BXD19", "BXD2"]
 trait_data = {
@@ -206,100 +207,7 @@ class TestHeatmap(TestCase):
         """Check for correct processing of data for heatmap generation."""
         self.assertEqual(
             process_traits_data_for_heatmap(
-                {"1": {
-                    "ID": "T1",
-                    "chromosomes": {
-                        1: {"Chr": 1,
-                            "loci": [
-                                {
-                                    "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]},
-                        2: {"Chr": 2,
-                            "loci": [
-                                {
-                                    "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                },
-                                {
-                                    "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                    "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                }]}}},
-                 "2": {
-                     "ID": "T1",
-                     "chromosomes": {
-                         1: {"Chr": 1,
-                             "loci": [
-                                 {
-                                     "Locus": "rs31443144", "cM": 1.500, "Mb": 3.010,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs6269442", "cM": 1.500, "Mb": 3.492,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs32285189", "cM": 1.630, "Mb": 3.511,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs258367496", "cM": 1.630, "Mb": 3.660,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs32430919", "cM": 1.750, "Mb": 3.777,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs36251697", "cM": 1.880, "Mb": 3.812,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs30658298", "cM": 2.010, "Mb": 4.431,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 }]},
-                         2: {"Chr": 2,
-                             "loci": [
-                                 {
-                                     "Locus": "rs51852623", "cM": 2.010, "Mb": 4.447,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs31879829", "cM": 2.140, "Mb": 4.519,
-                                     "LRS": 0.500, "Additive": -0.074, "pValue": 1.000
-                                 },
-                                 {
-                                     "Locus": "rs36742481", "cM": 2.140, "Mb": 4.776,
-                                     "LRS": 0.579, "Additive": -0.074, "pValue": 1.000
-                                 }]}}}},
+                {**organised_trait_1, **organised_trait_2},
                 ["2", "1"],
                 [1, 2]),
             [[[0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
-- 
cgit v1.2.3


From 1d09a9222f8c661da3abd6d61c09ae19eeb5d793 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 27 Sep 2021 05:02:09 +0300
Subject: Update terminology: `riset` to `group`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/clustering.gmi

* Update terminology to use the appropriate domain terminology according to
  Zachary's direction at
  https://github.com/genenetwork/genenetwork3/pull/37#issuecomment-926041744
---
 gn3/db/datasets.py             | 52 +++++++++++++++++++++---------------------
 gn3/db/traits.py               | 16 ++++++-------
 gn3/heatmaps.py                |  2 +-
 tests/unit/db/test_datasets.py | 42 +++++++++++++++++-----------------
 4 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'tests/unit/db')

diff --git a/gn3/db/datasets.py b/gn3/db/datasets.py
index 4a05499..6c328f5 100644
--- a/gn3/db/datasets.py
+++ b/gn3/db/datasets.py
@@ -119,9 +119,9 @@ def retrieve_dataset_name(
     return fn_map[trait_type](threshold, dataset_name, conn)
 
 
-def retrieve_geno_riset_fields(name, conn):
+def retrieve_geno_group_fields(name, conn):
     """
-    Retrieve the RISet, and RISetID values for various Geno trait types.
+    Retrieve the Group, and GroupID values for various Geno trait types.
     """
     query = (
         "SELECT InbredSet.Name, InbredSet.Id "
@@ -130,12 +130,12 @@ def retrieve_geno_riset_fields(name, conn):
         "AND GenoFreeze.Name = %(name)s")
     with conn.cursor() as cursor:
         cursor.execute(query, {"name": name})
-        return dict(zip(["riset", "risetid"], cursor.fetchone()))
+        return dict(zip(["group", "groupid"], cursor.fetchone()))
     return {}
 
-def retrieve_publish_riset_fields(name, conn):
+def retrieve_publish_group_fields(name, conn):
     """
-    Retrieve the RISet, and RISetID values for various Publish trait types.
+    Retrieve the Group, and GroupID values for various Publish trait types.
     """
     query = (
         "SELECT InbredSet.Name, InbredSet.Id "
@@ -144,12 +144,12 @@ def retrieve_publish_riset_fields(name, conn):
         "AND PublishFreeze.Name = %(name)s")
     with conn.cursor() as cursor:
         cursor.execute(query, {"name": name})
-        return dict(zip(["riset", "risetid"], cursor.fetchone()))
+        return dict(zip(["group", "groupid"], cursor.fetchone()))
     return {}
 
-def retrieve_probeset_riset_fields(name, conn):
+def retrieve_probeset_group_fields(name, conn):
     """
-    Retrieve the RISet, and RISetID values for various ProbeSet trait types.
+    Retrieve the Group, and GroupID values for various ProbeSet trait types.
     """
     query = (
         "SELECT InbredSet.Name, InbredSet.Id "
@@ -159,12 +159,12 @@ def retrieve_probeset_riset_fields(name, conn):
         "AND ProbeSetFreeze.Name = %(name)s")
     with conn.cursor() as cursor:
         cursor.execute(query, {"name": name})
-        return dict(zip(["riset", "risetid"], cursor.fetchone()))
+        return dict(zip(["group", "groupid"], cursor.fetchone()))
     return {}
 
-def retrieve_temp_riset_fields(name, conn):
+def retrieve_temp_group_fields(name, conn):
     """
-    Retrieve the RISet, and RISetID values for `Temp` trait types.
+    Retrieve the Group, and GroupID values for `Temp` trait types.
     """
     query = (
         "SELECT InbredSet.Name, InbredSet.Id "
@@ -173,30 +173,30 @@ def retrieve_temp_riset_fields(name, conn):
         "AND Temp.Name = %(name)s")
     with conn.cursor() as cursor:
         cursor.execute(query, {"name": name})
-        return dict(zip(["riset", "risetid"], cursor.fetchone()))
+        return dict(zip(["group", "groupid"], cursor.fetchone()))
     return {}
 
-def retrieve_riset_fields(trait_type, trait_name, dataset_info, conn):
+def retrieve_group_fields(trait_type, trait_name, dataset_info, conn):
     """
-    Retrieve the RISet, and RISetID values for various trait types.
+    Retrieve the Group, and GroupID values for various trait types.
     """
-    riset_fns_map = {
-        "Geno": retrieve_geno_riset_fields,
-        "Publish": retrieve_publish_riset_fields,
-        "ProbeSet": retrieve_probeset_riset_fields
+    group_fns_map = {
+        "Geno": retrieve_geno_group_fields,
+        "Publish": retrieve_publish_group_fields,
+        "ProbeSet": retrieve_probeset_group_fields
     }
 
     if trait_type == "Temp":
-        riset_info = retrieve_temp_riset_fields(trait_name, conn)
+        group_info = retrieve_temp_group_fields(trait_name, conn)
     else:
-        riset_info = riset_fns_map[trait_type](dataset_info["dataset_name"], conn)
+        group_info = group_fns_map[trait_type](dataset_info["dataset_name"], conn)
 
     return {
         **dataset_info,
-        **riset_info,
-        "riset": (
-            "BXD" if riset_info.get("riset") == "BXD300"
-            else riset_info.get("riset", ""))
+        **group_info,
+        "group": (
+            "BXD" if group_info.get("group") == "BXD300"
+            else group_info.get("group", ""))
     }
 
 def retrieve_temp_trait_dataset():
@@ -281,11 +281,11 @@ def retrieve_trait_dataset(trait_type, trait, threshold, conn):
             trait_type, threshold, trait["trait_name"],
             trait["db"]["dataset_name"], conn)
     }
-    riset = retrieve_riset_fields(
+    group = retrieve_group_fields(
         trait_type, trait["trait_name"], dataset_name_info, conn)
     return {
         "display_name": dataset_name_info["dataset_name"],
         **dataset_name_info,
         **dataset_fns[trait_type](),
-        **riset
+        **group
     }
diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index c9d05d7..f2673c8 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -226,7 +226,7 @@ def set_homologene_id_field_probeset(trait_info, conn):
     """
     query = (
         "SELECT HomologeneId FROM Homologene, Species, InbredSet"
-        " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(riset)s"
+        " WHERE Homologene.GeneId = %(geneid)s AND InbredSet.Name = %(group)s"
         " AND InbredSet.SpeciesId = Species.Id AND"
         " Species.TaxonomyId = Homologene.TaxonomyId")
     with conn.cursor() as cursor:
@@ -234,7 +234,7 @@ def set_homologene_id_field_probeset(trait_info, conn):
             query,
             {
                 k:v for k, v in trait_info.items()
-                if k in ["geneid", "riset"]
+                if k in ["geneid", "group"]
             })
         res = cursor.fetchone()
         if res:
@@ -422,7 +422,7 @@ def retrieve_trait_info(
     if trait_info["haveinfo"]:
         return {
             **trait_post_processing_functions_table[trait_dataset_type](
-                {**trait_info, "riset": trait_dataset["riset"]}),
+                {**trait_info, "group": trait_dataset["group"]}),
             "db": {**trait["db"], **trait_dataset}
         }
     return trait_info
@@ -449,14 +449,14 @@ def retrieve_temp_trait_data(trait_info: dict, conn: Any):
                 for row in cursor.fetchall()]
     return []
 
-def retrieve_species_id(riset, conn: Any):
+def retrieve_species_id(group, conn: Any):
     """
-    Retrieve a species id given the RISet value
+    Retrieve a species id given the Group value
     """
     with conn.cursor as cursor:
         cursor.execute(
-            "SELECT SpeciesId from InbredSet WHERE Name = %(riset)s",
-            {"riset": riset})
+            "SELECT SpeciesId from InbredSet WHERE Name = %(group)s",
+            {"group": group})
         return cursor.fetchone()[0]
     return None
 
@@ -482,7 +482,7 @@ def retrieve_geno_trait_data(trait_info: Dict, conn: Any):
             {"trait_name": trait_info["trait_name"],
              "dataset_name": trait_info["db"]["dataset_name"],
              "species_id": retrieve_species_id(
-                 trait_info["db"]["riset"], conn)})
+                 trait_info["db"]["group"], conn)})
         return [dict(zip(
             ["sample_name", "value", "se_error", "id"], row))
                 for row in cursor.fetchall()]
diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index b6fc6d3..a36940d 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -164,7 +164,7 @@ def build_heatmap(traits_names, conn: Any):
         retrieve_trait_info(threshold, fullname, conn)
         for fullname in traits_names]
     traits_data_list = [retrieve_trait_data(t, conn) for t in traits]
-    genotype_filename = build_genotype_file(traits[0]["riset"])
+    genotype_filename = build_genotype_file(traits[0]["group"])
     samples = load_genotype_samples(genotype_filename)
     exported_traits_data_list = [
         export_trait_data(td, samples) for td in traits_data_list]
diff --git a/tests/unit/db/test_datasets.py b/tests/unit/db/test_datasets.py
index 38de0e2..39f4af9 100644
--- a/tests/unit/db/test_datasets.py
+++ b/tests/unit/db/test_datasets.py
@@ -3,10 +3,10 @@
 from unittest import mock, TestCase
 from gn3.db.datasets import (
     retrieve_dataset_name,
-    retrieve_riset_fields,
-    retrieve_geno_riset_fields,
-    retrieve_publish_riset_fields,
-    retrieve_probeset_riset_fields)
+    retrieve_group_fields,
+    retrieve_geno_group_fields,
+    retrieve_publish_group_fields,
+    retrieve_probeset_group_fields)
 
 class TestDatasetsDBFunctions(TestCase):
     """Test cases for datasets functions."""
@@ -40,9 +40,9 @@ class TestDatasetsDBFunctions(TestCase):
                             table=table, cols=columns),
                         {"threshold": thresh, "name": dataset_name})
 
-    def test_retrieve_probeset_riset_fields(self):
+    def test_retrieve_probeset_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'ProbeSet' trait type.
         """
         for trait_name, expected in [
@@ -52,7 +52,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_probeset_riset_fields(trait_name, db_mock),
+                        retrieve_probeset_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
@@ -63,34 +63,34 @@ class TestDatasetsDBFunctions(TestCase):
                             " AND ProbeSetFreeze.Name = %(name)s"),
                         {"name": trait_name})
 
-    def test_retrieve_riset_fields(self):
+    def test_retrieve_group_fields(self):
         """
-        Test that the riset fields are set up correctly for the different trait
+        Test that the group fields are set up correctly for the different trait
         types.
         """
         for trait_type, trait_name, dataset_info, expected in [
                 ["Publish", "pubTraitName01", {"dataset_name": "pubDBName01"},
-                 {"dataset_name": "pubDBName01", "riset": ""}],
+                 {"dataset_name": "pubDBName01", "group": ""}],
                 ["ProbeSet", "prbTraitName01", {"dataset_name": "prbDBName01"},
-                 {"dataset_name": "prbDBName01", "riset": ""}],
+                 {"dataset_name": "prbDBName01", "group": ""}],
                 ["Geno", "genoTraitName01", {"dataset_name": "genoDBName01"},
-                 {"dataset_name": "genoDBName01", "riset": ""}],
-                ["Temp", "tempTraitName01", {}, {"riset": ""}],
+                 {"dataset_name": "genoDBName01", "group": ""}],
+                ["Temp", "tempTraitName01", {}, {"group": ""}],
                 ]:
             db_mock = mock.MagicMock()
             with self.subTest(
                     trait_type=trait_type, trait_name=trait_name,
                     dataset_info=dataset_info):
                 with db_mock.cursor() as cursor:
-                    cursor.execute.return_value = ("riset_name", 0)
+                    cursor.execute.return_value = ("group_name", 0)
                     self.assertEqual(
-                        retrieve_riset_fields(
+                        retrieve_group_fields(
                             trait_type, trait_name, dataset_info, db_mock),
                         expected)
 
-    def test_retrieve_publish_riset_fields(self):
+    def test_retrieve_publish_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'Publish' trait type.
         """
         for trait_name, expected in [
@@ -100,7 +100,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_publish_riset_fields(trait_name, db_mock),
+                        retrieve_publish_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
@@ -110,9 +110,9 @@ class TestDatasetsDBFunctions(TestCase):
                             " AND PublishFreeze.Name = %(name)s"),
                         {"name": trait_name})
 
-    def test_retrieve_geno_riset_fields(self):
+    def test_retrieve_geno_group_fields(self):
         """
-        Test that the `riset` and `riset_id` fields are retrieved appropriately
+        Test that the `group` and `group_id` fields are retrieved appropriately
         for the 'Geno' trait type.
         """
         for trait_name, expected in [
@@ -122,7 +122,7 @@ class TestDatasetsDBFunctions(TestCase):
                 with db_mock.cursor() as cursor:
                     cursor.execute.return_value = ()
                     self.assertEqual(
-                        retrieve_geno_riset_fields(trait_name, db_mock),
+                        retrieve_geno_group_fields(trait_name, db_mock),
                         expected)
                     cursor.execute.assert_called_once_with(
                         (
-- 
cgit v1.2.3