From 157df453cdb84591cb44af9f1d2677cd0b2c0380 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 12:17:11 +0300
Subject: Move 'export_trait_data' to 'gn3.db.traits' module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/db/traits.py: Move function `export_trait_data` here
* gn3/heatmaps.py: Remove function `export_trait_data`
* tests/unit/db/test_traits.py: Move function `export_trait_data` tests here
* tests/unit/test_heatmaps.py: Remove function `export_trait_data` here

  Function `export_trait_data` more closely corresponds to the traits and is
  used in more than just the `gn3.heatmaps` module. This commit moves the
  relevant code over to the `gn3.db.traits` module and also moves the tests to
  the corresponding tests modules.
---
 gn3/db/traits.py | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

(limited to 'gn3/db')

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index f2673c8..1e29aff 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -1,12 +1,81 @@
 """This class contains functions relating to trait data manipulation"""
 import os
+from functools import reduce
 from typing import Any, Dict, Union, Sequence
+
 from gn3.settings import TMPDIR
 from gn3.random import random_string
 from gn3.function_helpers import compose
 from gn3.db.datasets import retrieve_trait_dataset
 
 
+def export_trait_data(
+        trait_data: dict, samplelist: Sequence[str], dtype: str = "val",
+        var_exists: bool = False, n_exists: bool = False):
+    """
+    Export data according to `samplelist`. Mostly used in calculating
+    correlations.
+
+    DESCRIPTION:
+    Migrated from
+    https://github.com/genenetwork/genenetwork1/blob/master/web/webqtl/base/webqtlTrait.py#L166-L211
+
+    PARAMETERS
+    trait: (dict)
+      The dictionary of key-value pairs representing a trait
+    samplelist: (list)
+      A list of sample names
+    dtype: (str)
+      ... verify what this is ...
+    var_exists: (bool)
+      A flag indicating existence of variance
+    n_exists: (bool)
+      A flag indicating existence of ndata
+    """
+    def __export_all_types(tdata, sample):
+        sample_data = []
+        if tdata[sample]["value"]:
+            sample_data.append(tdata[sample]["value"])
+            if var_exists:
+                if tdata[sample]["variance"]:
+                    sample_data.append(tdata[sample]["variance"])
+                else:
+                    sample_data.append(None)
+            if n_exists:
+                if tdata[sample]["ndata"]:
+                    sample_data.append(tdata[sample]["ndata"])
+                else:
+                    sample_data.append(None)
+        else:
+            if var_exists and n_exists:
+                sample_data += [None, None, None]
+            elif var_exists or n_exists:
+                sample_data += [None, None]
+            else:
+                sample_data.append(None)
+
+        return tuple(sample_data)
+
+    def __exporter(accumulator, sample):
+        # pylint: disable=[R0911]
+        if sample in trait_data["data"]:
+            if dtype == "val":
+                return accumulator + (trait_data["data"][sample]["value"], )
+            if dtype == "var":
+                return accumulator + (trait_data["data"][sample]["variance"], )
+            if dtype == "N":
+                return accumulator + (trait_data["data"][sample]["ndata"], )
+            if dtype == "all":
+                return accumulator + __export_all_types(trait_data["data"], sample)
+            raise KeyError("Type `%s` is incorrect" % dtype)
+        if var_exists and n_exists:
+            return accumulator + (None, None, None)
+        if var_exists or n_exists:
+            return accumulator + (None, None)
+        return accumulator + (None,)
+
+    return reduce(__exporter, samplelist, tuple())
+
 def get_trait_csv_sample_data(conn: Any,
                               trait_name: int, phenotype_id: int):
     """Fetch a trait and return it as a csv string"""
-- 
cgit v1.2.3


From 94ca79045baf978d6aab964c7c70b84911c1124f Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 18 Oct 2021 12:27:32 +0300
Subject: Move `export_informative` function to `gn3.db.traits` module

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/db/traits.py: Move `export_informative` function here
* gn3/partial_correlations.py: Remove `export_informative` function
* tests/unit/db/test_traits.py: Move `export_informative` function tests here
* tests/unit/test_partial_correlations.py: Remove `export_informative`
  function tests

  The `export_informative` function relates more to the traits than to the
  partial correlations, and could find use in more than just the partial
  correlations stuff. This commit moves the function to the more
  traits-specific `gn3.db.traits` module.
---
 gn3/db/traits.py                        | 24 +++++++++
 gn3/partial_correlations.py             | 24 ---------
 tests/unit/db/test_traits.py            | 86 ++++++++++++++++++++++++++++++++
 tests/unit/test_partial_correlations.py | 87 +--------------------------------
 4 files changed, 111 insertions(+), 110 deletions(-)

(limited to 'gn3/db')

diff --git a/gn3/db/traits.py b/gn3/db/traits.py
index 1e29aff..1c6aaa7 100644
--- a/gn3/db/traits.py
+++ b/gn3/db/traits.py
@@ -743,3 +743,27 @@ def generate_traits_filename(base_path: str = TMPDIR):
     """Generate a unique filename for use with generated traits files."""
     return "{}/traits_test_file_{}.txt".format(
         os.path.abspath(base_path), random_string(10))
+
+def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
+    """
+    Export informative strain
+
+    This is a migration of the `exportInformative` function in
+    web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
+
+    There is a chance that the original implementation has a bug, especially
+    dealing with the `inc_var` value. It the `inc_var` value is meant to control
+    the inclusion of the `variance` value, then the current implementation, and
+    that one in GN1 have a bug.
+    """
+    def __exporter__(acc, data_item):
+        if not inc_var or data_item["variance"] is not None:
+            return (
+                acc[0] + (data_item["sample_name"],),
+                acc[1] + (data_item["value"],),
+                acc[2] + (data_item["variance"],))
+        return acc
+    return reduce(
+        __exporter__,
+        filter(lambda td: td["value"] is not None, trait_data["data"].values()),
+        (tuple(), tuple(), tuple()))
diff --git a/gn3/partial_correlations.py b/gn3/partial_correlations.py
index 8c37886..df390ed 100644
--- a/gn3/partial_correlations.py
+++ b/gn3/partial_correlations.py
@@ -6,27 +6,3 @@ GeneNetwork1.
 """
 
 from functools import reduce
-
-def export_informative(trait_data: dict, inc_var: bool = False) -> tuple:
-    """
-    Export informative strain
-
-    This is a migration of the `exportInformative` function in
-    web/webqtl/base/webqtlTrait.py module in GeneNetwork1.
-
-    There is a chance that the original implementation has a bug, especially
-    dealing with the `inc_var` value. It the `inc_var` value is meant to control
-    the inclusion of the `variance` value, then the current implementation, and
-    that one in GN1 have a bug.
-    """
-    def __exporter__(acc, data_item):
-        if not inc_var or data_item["variance"] is not None:
-            return (
-                acc[0] + (data_item["sample_name"],),
-                acc[1] + (data_item["value"],),
-                acc[2] + (data_item["variance"],))
-        return acc
-    return reduce(
-        __exporter__,
-        filter(lambda td: td["value"] is not None, trait_data["data"].values()),
-        (tuple(), tuple(), tuple()))
diff --git a/tests/unit/db/test_traits.py b/tests/unit/db/test_traits.py
index 0c4ef78..67f0c6f 100644
--- a/tests/unit/db/test_traits.py
+++ b/tests/unit/db/test_traits.py
@@ -3,6 +3,7 @@ from unittest import mock, TestCase
 from gn3.db.traits import (
     build_trait_name,
     export_trait_data,
+    export_informative,
     set_haveinfo_field,
     update_sample_data,
     retrieve_trait_info,
@@ -315,3 +316,88 @@ class TestTraitsDBFunctions(TestCase):
                         trait_data, samplelist, dtype=dtype, var_exists=vflag,
                         n_exists=nflag),
                     expected)
+
+    def test_export_informative(self):
+        """Test that the function exports appropriate data."""
+        for trait_data, inc_var, expected in [
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": None, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample4"), (9, 8, 6),
+                    (None, None, None))],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, True, (tuple(), tuple(), tuple())],
+                [{"data": {
+                    "sample1": {
+                        "sample_name": "sample1", "value": 9, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample2": {
+                        "sample_name": "sample2", "value": 8, "variance": 0.657,
+                        "ndata": 13
+                    },
+                    "sample3": {
+                        "sample_name": "sample3", "value": 7, "variance": None,
+                        "ndata": 13
+                    },
+                    "sample4": {
+                        "sample_name": "sample4", "value": 6, "variance": None,
+                        "ndata": 13
+                    },
+                }}, 0, (
+                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
+                    (None, 0.657, None, None))]]:
+            with self.subTest(trait_data=trait_data):
+                self.assertEqual(
+                    export_informative(trait_data, inc_var), expected)
diff --git a/tests/unit/test_partial_correlations.py b/tests/unit/test_partial_correlations.py
index 6eea078..f204d4f 100644
--- a/tests/unit/test_partial_correlations.py
+++ b/tests/unit/test_partial_correlations.py
@@ -1,92 +1,7 @@
 """Module contains tests for gn3.partial_correlations"""
 
 from unittest import TestCase
-from gn3.partial_correlations import export_informative
+
 
 class TestPartialCorrelations(TestCase):
     """Class for testing partial correlations computation functions"""
-
-    def test_export_informative(self):
-        """Test that the function exports appropriate data."""
-        for trait_data, inc_var, expected in [
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
-                    (None, None, None, None))],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": None, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample4"), (9, 8, 6),
-                    (None, None, None))],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, True, (tuple(), tuple(), tuple())],
-                [{"data": {
-                    "sample1": {
-                        "sample_name": "sample1", "value": 9, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample2": {
-                        "sample_name": "sample2", "value": 8, "variance": 0.657,
-                        "ndata": 13
-                    },
-                    "sample3": {
-                        "sample_name": "sample3", "value": 7, "variance": None,
-                        "ndata": 13
-                    },
-                    "sample4": {
-                        "sample_name": "sample4", "value": 6, "variance": None,
-                        "ndata": 13
-                    },
-                }}, 0, (
-                    ("sample1", "sample2", "sample3", "sample4"), (9, 8, 7, 6),
-                    (None, 0.657, None, None))]]:
-            with self.subTest(trait_data=trait_data):
-                self.assertEqual(
-                    export_informative(trait_data, inc_var), expected)
-- 
cgit v1.2.3


From 41936d0a486ef54bf4fc049c2b4d85dca43ab761 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 21 Oct 2021 09:36:36 +0300
Subject: Implement `translate_to_mouse_gene_id` function

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Migrate the `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID`
  function in GN1 to GN3.
  This is a function that retrieves data from the database, and therefore uses
  a system outside of our code, therefore, the function does not have a
  corresponding unit test.

  This kind of function will probably need to be tested at the integration or
  system tests level, where we test that our code interacts correcly with any
  and all external systems that it should.
---
 gn3/db/species.py | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'gn3/db')

diff --git a/gn3/db/species.py b/gn3/db/species.py
index 0deae4e..1e5015f 100644
--- a/gn3/db/species.py
+++ b/gn3/db/species.py
@@ -30,3 +30,34 @@ def get_chromosome(name: str, is_species: bool, conn: Any) -> Optional[Tuple]:
     with conn.cursor() as cursor:
         cursor.execute(_sql)
         return cursor.fetchall()
+
+def translate_to_mouse_gene_id(species: str, geneid: int, conn: Any) -> int:
+    """
+    Translate rat or human geneid to mouse geneid
+
+    This is a migration of the
+    `web.webqtl.correlation/CorrelationPage.translateToMouseGeneID` function in
+    GN1
+    """
+    assert species in ("rat", "mouse", "human"), "Invalid species"
+    if geneid is None:
+        return 0
+
+    if species == "mouse":
+        return geneid
+
+    with conn.cursor as cursor:
+        if species == "rat":
+            cursor.execute(
+                "SELECT mouse FROM GeneIDXRef WHERE rat = %s", geneid)
+            rat_geneid = cursor.fetchone()
+            if rat_geneid:
+                return rat_geneid[0]
+
+        cursor.execute(
+            "SELECT mouse FROM GeneIDXRef WHERE human = %s", geneid)
+        human_geneid = cursor.fetchone()
+        if human_geneid:
+            return human_geneid[0]
+
+    return 0 # default if all else fails
-- 
cgit v1.2.3


From df8185078a52c89cc5a75ff9be413a236da29a6e Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 25 Oct 2021 09:31:58 +0300
Subject: Implement `get_filename` for correlations

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Implement `get_filename` for the correlations, to be used to determine
  whether to do fast or normal correlations.

  This is a migration of the
  `web.webqtl.correlation.CorrelationPage.getFileName` function in GN1
---
 gn3/db/correlations.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 gn3/db/correlations.py

(limited to 'gn3/db')

diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
new file mode 100644
index 0000000..fa8e7ca
--- /dev/null
+++ b/gn3/db/correlations.py
@@ -0,0 +1,26 @@
+"""
+This module will hold functions that are used in the (partial) correlations
+feature to access the database to retrieve data needed for computations.
+"""
+
+from typing import Any
+def get_filename(target_db_name: str, conn: Any) -> str:
+    """
+    Retrieve the name of the reference database file with which correlations are
+    computed.
+
+    This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.getFileName` function in
+    GeneNetwork1.
+    """
+    with conn.cursor() as cursor:
+        cursor.execute(
+            "SELECT Id, FullName from ProbeSetFreeze WHERE Name-%s",
+            target_db_name)
+        result = cursor.fetchone()
+        if result:
+            return "ProbeSetFreezeId_{tid}_FullName_{fname}.txt".format(
+                tid=result[0],
+                fname=result[1].replace(' ', '_').replace('/', '_'))
+
+    return ""
-- 
cgit v1.2.3


From 0814eea6b57e45d4337424e63c164d204d03b64d Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 25 Oct 2021 12:38:24 +0300
Subject: Implement `fetch_literature_correlations` and depedencies

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Migrate:

  * `web.webqtl.correlation.CorrelationPage.getTempLiteratureTable`
  * `web.webqtl.correlation.CorrelationPage.fetchLitCorrelations`

  from GeneNetwork1.

  The first function creates and populates a temporary table with the
  literature correlations data.

  The second function uses the data in the newly created temporary table to
  link the trait with the correlation value.
---
 gn3/db/correlations.py | 113 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

(limited to 'gn3/db')

diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
index fa8e7ca..67cfef9 100644
--- a/gn3/db/correlations.py
+++ b/gn3/db/correlations.py
@@ -4,6 +4,10 @@ feature to access the database to retrieve data needed for computations.
 """
 
 from typing import Any
+
+from gn3.random import random_string
+from gn3.db.species import translate_to_mouse_gene_id
+
 def get_filename(target_db_name: str, conn: Any) -> str:
     """
     Retrieve the name of the reference database file with which correlations are
@@ -24,3 +28,112 @@ def get_filename(target_db_name: str, conn: Any) -> str:
                 fname=result[1].replace(' ', '_').replace('/', '_'))
 
     return ""
+
+def build_temporary_literature_table(
+        species: str, gene_id: int, return_number: int, conn: Any) -> str:
+    """
+    Build and populate a temporary table to hold the literature correlation data
+    to be used in computations.
+
+    "This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.getTempLiteratureTable` function in
+    GeneNetwork1.
+    """
+    def __translated_species_id(row, cursor):
+        if species == "mouse":
+            return row[1]
+        query = {
+            "rat": "SELECT rat FROM GeneIDXRef WHERE mouse=%s",
+            "human": "SELECT human FROM GeneIDXRef WHERE mouse=%d"}
+        if species in query.keys():
+            cursor.execute(query[species], row[1])
+            record = cursor.fetchone()
+            if record:
+                return record[0]
+            return None
+        return None
+
+    temp_table_name = f"TOPLITERATURE{random_string(8)}"
+    with conn.cursor as cursor:
+        mouse_geneid = translate_to_mouse_gene_id(species, gene_id, conn)
+        data_query = (
+            "SELECT GeneId1, GeneId2, value FROM LCorrRamin3 "
+            "WHERE GeneId1 = %(mouse_gene_id)s "
+            "UNION ALL "
+            "SELECT GeneId2, GeneId1, value FROM LCorrRamin3 "
+            "WHERE GeneId2 = %(mouse_gene_id)s "
+            "AND GeneId1 != %(mouse_gene_id)s")
+        cursor.execute(
+            (f"CREATE TEMPORARY TABLE {temp_table_name} ("
+             "GeneId1 int(12) unsigned, "
+             "GeneId2 int(12) unsigned PRIMARY KEY, "
+             "value double)"))
+        cursor.execute(data_query, mouse_gene_id=mouse_geneid)
+        literature_data = [
+            {"GeneId1": row[0], "GeneId2": row[1], "value": row[2]}
+            for row in cursor.fetchall()
+            if __translated_species_id(row, cursor)]
+
+        cursor.execute(
+            (f"INSERT INTO {temp_table_name} "
+             "VALUES (%(GeneId1)s, %(GeneId2)s, %(value)s)"),
+            literature_data[0:(2 * return_number)])
+
+    return temp_table_name
+
+def fetch_geno_literature_correlations(temp_table: str) -> str:
+    """
+    Helper function for `fetch_literature_correlations` below, to build query
+    for `Geno*` tables.
+    """
+    return (
+        f"SELECT Geno.Name, {temp_table}.value "
+        "FROM Geno, GenoXRef, GenoFreeze "
+        f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId "
+        "WHERE ProbeSet.GeneId IS NOT NULL "
+        f"AND {temp_table}.value IS NOT NULL "
+        "AND GenoXRef.GenoFreezeId = GenoFreeze.Id "
+        "AND GenoFreeze.Name = %(db_name)s "
+        "AND Geno.Id=GenoXRef.GenoId "
+        "ORDER BY Geno.Id")
+
+def fetch_probeset_literature_correlations(temp_table: str) -> str:
+    """
+    Helper function for `fetch_literature_correlations` below, to build query
+    for `ProbeSet*` tables.
+    """
+    return (
+        f"SELECT ProbeSet.Name, {temp_table}.value "
+        "FROM ProbeSet, ProbeSetXRef, ProbeSetFreeze "
+        "LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId "
+        "WHERE ProbeSet.GeneId IS NOT NULL "
+        "AND {temp_table}.value IS NOT NULL "
+        "AND ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id "
+        "AND ProbeSetFreeze.Name = %(db_name)s "
+        "AND ProbeSet.Id=ProbeSetXRef.ProbeSetId "
+        "ORDER BY ProbeSet.Id")
+
+def fetch_literature_correlations(
+        species: str, gene_id: int, dataset: dict, return_number: int,
+        conn: Any) -> dict:
+    """
+    Gather the literature correlation data and pair it with trait id string(s).
+
+    This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.fetchLitCorrelations` function in
+    GeneNetwork1.
+    """
+    temp_table = build_temporary_literature_table(
+        species, gene_id, return_number, conn)
+    query_fns = {
+        "Geno": fetch_geno_literature_correlations,
+        # "Temp": fetch_temp_literature_correlations,
+        # "Publish": fetch_publish_literature_correlations,
+        "ProbeSet": fetch_probeset_literature_correlations}
+    with conn.cursor as cursor:
+        cursor.execute(
+            query_fns[dataset["dataset_type"]](temp_table),
+            db_name=dataset["dataset_name"])
+        results = cursor.fetchall()
+        cursor.execute("DROP TEMPORARY TABLE %s", temp_table)
+        return dict(results) # {trait_name: lit_corr for trait_name, lit_corr in results}
-- 
cgit v1.2.3


From c13afb3af166d2b01e4f9fd9b09bb231f0a63cb1 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 25 Oct 2021 19:19:54 +0300
Subject: Start implementation of `fetch_tissue_correlations` and dependencies

* compare_tissue_correlation_absolute_values: New function. Complete. Used for
  sorting of tissue correlation values
* fetch_symbol_value_pair_dict: New function. Complete. Maps gene symbols to
  tissue expression data
* fetch_gene_symbol_tissue_value_dict: New function. Complete. Wrapper for
  `gn3.db.correlations.fetch_symbol_value_pair_dict` function
* fetch_tissue_probeset_xref_info: New function. Complete. Retrieves the
  Probeset XRef information for tissues from the database.
* correlations_of_all_tissue_traits: Stub. Dependencies not completed yet.
* build_temporary_tissue_correlations_table: Stub. Dependencies not completed
  yet.
* fetch_tissue_correlations: New function. Incomplete. This function calls (a)
  stub(s) function(s) which is/are under development still.
---
 gn3/db/correlations.py | 183 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 181 insertions(+), 2 deletions(-)

(limited to 'gn3/db')

diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
index 67cfef9..87ab082 100644
--- a/gn3/db/correlations.py
+++ b/gn3/db/correlations.py
@@ -3,9 +3,11 @@ This module will hold functions that are used in the (partial) correlations
 feature to access the database to retrieve data needed for computations.
 """
 
-from typing import Any
+from functools import reduce
+from typing import Any, Dict, Tuple
 
 from gn3.random import random_string
+from gn3.data_helpers import partition_all
 from gn3.db.species import translate_to_mouse_gene_id
 
 def get_filename(target_db_name: str, conn: Any) -> str:
@@ -136,4 +138,181 @@ def fetch_literature_correlations(
             db_name=dataset["dataset_name"])
         results = cursor.fetchall()
         cursor.execute("DROP TEMPORARY TABLE %s", temp_table)
-        return dict(results) # {trait_name: lit_corr for trait_name, lit_corr in results}
+        return dict(results)
+
+def compare_tissue_correlation_absolute_values(val1, val2):
+    """
+    Comparison function for use when sorting tissue correlation values.
+
+    This is a partial migration of the
+    `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in
+    GeneNetwork1."""
+    try:
+        if abs(val1) < abs(val2):
+            return 1
+        if abs(val1) == abs(val2):
+            return 0
+        return -1
+    except TypeError:
+        return 0
+
+def fetch_symbol_value_pair_dict(
+        symbol_list: Tuple[str, ...], data_id_dict: dict,
+        conn: Any) -> Dict[str, Tuple[float, ...]]:
+    """
+    Map each gene symbols to the corresponding tissue expression data.
+
+    This is a migration of the
+    `web.webqtl.correlation.correlationFunction.getSymbolValuePairDict` function
+    in GeneNetwork1.
+    """
+    data_ids = {
+        symbol: data_id_dict.get(symbol) for symbol in symbol_list
+        if data_id_dict.get(symbol) is not None
+    }
+    query = "SELECT Id, value FROM TissueProbeSetData WHERE Id IN %(data_ids)s"
+    with conn.cursor() as cursor:
+        cursor.execute(
+            query,
+            data_ids=tuple(data_ids.values()))
+        value_results = cursor.fetchall()
+        return {
+            key: tuple(row[1] for row in value_results if row[0] == key)
+            for key in data_ids.keys()
+        }
+
+    return {}
+
+def fetch_gene_symbol_tissue_value_dict(
+        symbol_list: Tuple[str, ...], data_id_dict: dict, conn: Any,
+        limit_num: int = 1000) -> dict:#getGeneSymbolTissueValueDict
+    """
+    Wrapper function for `gn3.db.correlations.fetch_symbol_value_pair_dict`.
+
+    This is a migrations of the
+    `web.webqtl.correlation.correlationFunction.getGeneSymbolTissueValueDict` in
+    GeneNetwork1.
+    """
+    count = len(symbol_list)
+    if count != 0 and count <= limit_num:
+        return fetch_symbol_value_pair_dict(symbol_list, data_id_dict, conn)
+
+    if count > limit_num:
+        return {
+            key: value for dct in [
+                fetch_symbol_value_pair_dict(sl, data_id_dict, conn)
+                for sl in partition_all(limit_num, symbol_list)]
+            for key, value in dct.items()
+        }
+
+    return {}
+
+def fetch_tissue_probeset_xref_info(
+        gene_name_list: Tuple[str, ...], probeset_freeze_id: int,
+        conn: Any) -> Tuple[tuple, dict, dict, dict, dict, dict, dict]:
+    """
+    Retrieve the ProbeSet XRef information for tissues.
+
+    This is a migration of the
+    `web.webqtl.correlation.correlationFunction.getTissueProbeSetXRefInfo`
+    function in GeneNetwork1."""
+    with conn.cursor() as cursor:
+        if len(gene_name_list) == 0:
+            query = (
+                "SELECT t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, "
+                "t.description, t.Probe_Target_Description "
+                "FROM "
+                "("
+                "  SELECT Symbol, max(Mean) AS maxmean "
+                "  FROM TissueProbeSetXRef "
+                "  WHERE TissueProbeSetFreezeId=%(probeset_freeze_id)s "
+                "  AND Symbol != '' "
+                "  AND Symbol IS NOT NULL "
+                "  GROUP BY Symbol"
+                ") AS x "
+                "INNER JOIN TissueProbeSetXRef AS t ON t.Symbol = x.Symbol "
+                "AND t.Mean = x.maxmean")
+            cursor.execute(query, probeset_freeze_id=probeset_freeze_id)
+        else:
+            query = (
+                "SELECT t.Symbol, t.GeneId, t.DataId, t.Chr, t.Mb, "
+                "t.description, t.Probe_Target_Description "
+                "FROM "
+                "("
+                "  SELECT Symbol, max(Mean) AS maxmean "
+                "  FROM TissueProbeSetXRef "
+                "  WHERE TissueProbeSetFreezeId=%(probeset_freeze_id)s "
+                "  AND Symbol in %(symbols)s "
+                "  GROUP BY Symbol"
+                ") AS x "
+                "INNER JOIN TissueProbeSetXRef AS t ON t.Symbol = x.Symbol "
+                "AND t.Mean = x.maxmean")
+            cursor.execute(
+                query, probeset_freeze_id=probeset_freeze_id,
+                symbols=tuple(gene_name_list))
+
+        results = cursor.fetchall()
+
+    return reduce(
+        lambda acc, item: (
+            acc[0] + (item[0],),
+            {**acc[1], item[0].lower(): item[1]},
+            {**acc[1], item[0].lower(): item[2]},
+            {**acc[1], item[0].lower(): item[3]},
+            {**acc[1], item[0].lower(): item[4]},
+            {**acc[1], item[0].lower(): item[5]},
+            {**acc[1], item[0].lower(): item[6]}),
+        results or tuple(),
+        (tuple(), {}, {}, {}, {}, {}, {}))
+
+def correlations_of_all_tissue_traits() -> Tuple[dict, dict]:
+    """
+    This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.calculateCorrOfAllTissueTrait`
+    function in GeneNetwork1.
+    """
+    raise Exception("Unimplemented!!!")
+    return ({}, {})
+
+def build_temporary_tissue_correlations_table(
+        trait_symbol: str, probeset_freeze_id: int, method: str,
+        return_number: int, conn: Any) -> str:
+    """
+    Build a temporary table to hold the tissue correlations data.
+
+    This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.getTempTissueCorrTable` function in
+    GeneNetwork1."""
+    raise Exception("Unimplemented!!!")
+    return ""
+
+def fetch_tissue_correlations(
+        dataset: dict, trait_symbol: str, probeset_freeze_id: int, method: str,
+        return_number: int, conn: Any) -> dict:
+    """
+    Pair tissue correlations data with a trait id string.
+
+    This is a migration of the
+    `web.webqtl.correlation.CorrelationPage.fetchTissueCorrelations` function in
+    GeneNetwork1.
+    """
+    temp_table = build_temporary_tissue_correlations_table(
+        trait_symbol, probeset_freeze_id, method, return_number, conn)
+    with conn.cursor() as cursor:
+        cursor.execute(
+            (
+                f"SELECT ProbeSet.Name, {temp_table}.Correlation, "
+                f"{temp_table}.PValue "
+                "FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) "
+                "LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol "
+                "WHERE ProbeSetFreeze.Name = %(db_name) "
+                "AND ProbeSetFreeze.Id=ProbeSetXRef.ProbeSetFreezeId "
+                "AND ProbeSet.Id = ProbeSetXRef.ProbeSetId "
+                "AND ProbeSet.Symbol IS NOT NULL "
+                "AND %s.Correlation IS NOT NULL"),
+            db_name=dataset["dataset_name"])
+        results = cursor.fetchall()
+        cursor.execute("DROP TEMPORARY TABLE %s", temp_table)
+        return {
+            trait_name: (tiss_corr, tiss_p_val)
+            for trait_name, tiss_corr, tiss_p_val in results}
-- 
cgit v1.2.3