about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-22 13:56:03 +0300
committerFrederick Muriuki Muriithi2021-11-22 13:56:03 +0300
commit575da0baf4468d27782c73b19995b3adb934ba70 (patch)
tree2d9630a22488365939ed95dc71fe3c04b0d427c8
parent55d698b1fb07afe74bf1dd570f9f495aefea1086 (diff)
downloadgenenetwork3-575da0baf4468d27782c73b19995b3adb934ba70.tar.gz
Add test to query builders
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Add some tests for the query builders to ensure that the queries are built
  up correctly.
-rw-r--r--gn3/db/correlations.py78
-rw-r--r--tests/unit/db/test_correlation.py90
2 files changed, 132 insertions, 36 deletions
diff --git a/gn3/db/correlations.py b/gn3/db/correlations.py
index ff570b4..7daff87 100644
--- a/gn3/db/correlations.py
+++ b/gn3/db/correlations.py
@@ -402,6 +402,43 @@ def fetch_sample_ids(
             species_name=species)
         return cursor.fetchall()
 
+def build_query_sgo_lit_corr(
+        db_type: str, temp_table: str, sample_id_columns: str,
+        joins: Tuple[str, ...]) -> str:
+    """
+    Build query for `SGO Literature Correlation` data, when querying the given
+    `temp_table` temporary table.
+    """
+    return (
+        (f"SELECT {db_type}.Name, {temp_table}.value, " +
+         sample_id_columns +
+         f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " +
+         f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " +
+         " ".join(joins) +
+         f" WHERE ProbeSet.GeneId IS NOT NULL " +
+         f"AND {temp_table}.value IS NOT NULL " +
+         f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " +
+         f"AND {db_type}Freeze.Name = %(db_name)s " +
+         f"AND {db_type}.Id = {db_type}XRef.{db_type}Id " +
+         f"ORDER BY {db_type}.Id"),
+        2)
+
+def build_query_tissue_corr(db_type, temp_table, sample_id_columns, joins):
+    return (
+        (f"SELECT {db_type}.Name, {temp_table}.Correlation, " +
+         f"{temp_table}.PValue, " +
+         sample_id_columns +
+         f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " +
+         f"LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " +
+         " ".join(joins) +
+         f" WHERE ProbeSet.Symbol IS NOT NULL " +
+         f"AND {temp_table}.Correlation IS NOT NULL " +
+         f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " +
+         f"AND {db_type}Freeze.Name = %(db_name)s " +
+         f"AND {db_type}.Id = {db_type}XRef.{db_type}Id "
+         f"ORDER BY {db_type}.Id"),
+        3)
+
 def fetch_all_database_data(
         conn: Any, species: str, gene_id: int, gene_symbol: str,
         samples: Tuple[str, ...], db_type: str, db_name: str, method: str,
@@ -411,37 +448,6 @@ def fetch_all_database_data(
     `web.webqtl.correlation.CorrelationPage.fetchAllDatabaseData` function in
     GeneNetwork1.
     """
-    def __build_query_sgo_lit__(temp_table, sample_id_columns, joins):
-        return (
-            (f"SELECT {db_type}.Name, {temp_table}.value " +
-             sample_id_columns +
-             f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " +
-             f"LEFT JOIN {temp_table} ON {temp_table}.GeneId2=ProbeSet.GeneId " +
-             " ".join(joins) +
-             f" WHERE ProbeSet.GeneId IS NOT NULL " +
-             f"AND {temp_table}.value IS NOT NULL " +
-             f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " +
-             f"AND {db_type}Freeze.Name = %(db_name)s " +
-             f"AND {db_type}.Id = {db_type}XRef.{db_type}Id " +
-             f"ORDER BY {db_type}.Id"),
-            2)
-
-    def __build_query_tissue_corr__(temp_table, sample_id_columns, joins):
-        return (
-            (f"SELECT {db_type}.Name, {temp_table}.Correlation, " +
-             f"{temp_table}.PValue, " +
-             sample_id_columns +
-             f" FROM ({db_type}, {db_type}XRef, {db_type}Freeze) " +
-             f"LEFT JOIN {temp_table} ON {temp_table}.Symbol=ProbeSet.Symbol " +
-             " ".join(joins) +
-             f" WHERE ProbeSet.Symbol IS NOT NULL " +
-             f"AND {temp_table}.Correlation IS NOT NULL " +
-             f"AND {db_type}XRef.{db_type}FreezeId = {db_type}Freeze.Id " +
-             f"AND {db_type}Freeze.Name = %(db_name)s " +
-             f"AND {db_type}.Id = {db_type}XRef.%sId "
-             f"ORDER BY {db_type}.Id"),
-            3)
-
     def __build_query__(sample_ids, temp_table):
         sample_id_columns = ", ".join(f"T{smpl}.value" for smpl in samples_ids)
         if db_type == "Publish":
@@ -460,17 +466,17 @@ def fetch_all_database_data(
                 1)
         if temp_table is not None:
             joins = tuple(
-                ("LEFT JOIN {db_type}Data AS T{item} "
-                 "ON T{item}.Id = {db_type}XRef.DataId "
-                 "AND T{item}.StrainId=%(T{item}_sample_id)s")
+                (f"LEFT JOIN {db_type}Data AS T{item} "
+                 f"ON T{item}.Id = {db_type}XRef.DataId "
+                 f"AND T{item}.StrainId=%(T{item}_sample_id)s")
                 for item in sample_ids)
             if method.lower() == "sgo literature correlation":
-                return __build_query_sgo_lit__(
+                return build_query_sgo_lit_corr(
                     sample_ids, temp_table, sample_id_columns)
             if method.lower() in (
                     "tissue correlation, pearson's r",
                     "tissue correlation, spearman's rho"):
-                return __build_query_tissue_corr__(
+                return build_query_tissue_corr(
                     sample_ids, temp_table, sample_id_columns)
         joins = tuple(
             (f"LEFT JOIN {db_type}Data AS T{item} "
diff --git a/tests/unit/db/test_correlation.py b/tests/unit/db/test_correlation.py
new file mode 100644
index 0000000..866d28d
--- /dev/null
+++ b/tests/unit/db/test_correlation.py
@@ -0,0 +1,90 @@
+"""
+Tests for the gn3.db.correlations module
+"""
+
+from unittest import TestCase
+
+from gn3.db.correlations import (
+    build_query_sgo_lit_corr,
+    build_query_tissue_corr)
+
+class TestCorrelation(TestCase):
+    """Test cases for correlation data fetching functions"""
+    maxDiff = None
+
+    def test_build_query_sgo_lit_corr(self):
+        self.assertEqual(
+            build_query_sgo_lit_corr(
+                "Probeset",
+                "temp_table_xy45i7wd",
+                "T1.value, T2.value, T3.value",
+                (("LEFT JOIN ProbesetData AS T1 "
+                  "ON T1.Id = ProbesetXRef.DataId "
+                  "AND T1.StrainId=%(T1_sample_id)s"),
+                 (
+                     "LEFT JOIN ProbesetData AS T2 "
+                     "ON T2.Id = ProbesetXRef.DataId "
+                     "AND T2.StrainId=%(T2_sample_id)s"),
+                 (
+                     "LEFT JOIN ProbesetData AS T3 "
+                     "ON T3.Id = ProbesetXRef.DataId "
+                     "AND T3.StrainId=%(T3_sample_id)s"))),
+            (("SELECT Probeset.Name, temp_table_xy45i7wd.value, "
+              "T1.value, T2.value, T3.value "
+              "FROM (Probeset, ProbesetXRef, ProbesetFreeze) "
+              "LEFT JOIN temp_table_xy45i7wd ON temp_table_xy45i7wd.GeneId2=ProbeSet.GeneId "
+              "LEFT JOIN ProbesetData AS T1 "
+              "ON T1.Id = ProbesetXRef.DataId "
+              "AND T1.StrainId=%(T1_sample_id)s "
+              "LEFT JOIN ProbesetData AS T2 "
+              "ON T2.Id = ProbesetXRef.DataId "
+              "AND T2.StrainId=%(T2_sample_id)s "
+              "LEFT JOIN ProbesetData AS T3 "
+              "ON T3.Id = ProbesetXRef.DataId "
+              "AND T3.StrainId=%(T3_sample_id)s "
+              "WHERE ProbeSet.GeneId IS NOT NULL "
+              "AND temp_table_xy45i7wd.value IS NOT NULL "
+              "AND ProbesetXRef.ProbesetFreezeId = ProbesetFreeze.Id "
+              "AND ProbesetFreeze.Name = %(db_name)s "
+              "AND Probeset.Id = ProbesetXRef.ProbesetId "
+              "ORDER BY Probeset.Id"),
+             2))
+
+    def test_build_query_tissue_corr(self):
+        self.assertEqual(
+            build_query_tissue_corr(
+                "Probeset",
+                "temp_table_xy45i7wd",
+                "T1.value, T2.value, T3.value",
+                (("LEFT JOIN ProbesetData AS T1 "
+                  "ON T1.Id = ProbesetXRef.DataId "
+                  "AND T1.StrainId=%(T1_sample_id)s"),
+                 (
+                     "LEFT JOIN ProbesetData AS T2 "
+                     "ON T2.Id = ProbesetXRef.DataId "
+                     "AND T2.StrainId=%(T2_sample_id)s"),
+                 (
+                     "LEFT JOIN ProbesetData AS T3 "
+                     "ON T3.Id = ProbesetXRef.DataId "
+                     "AND T3.StrainId=%(T3_sample_id)s"))),
+            (("SELECT Probeset.Name, temp_table_xy45i7wd.Correlation, "
+              "temp_table_xy45i7wd.PValue, "
+              "T1.value, T2.value, T3.value "
+              "FROM (Probeset, ProbesetXRef, ProbesetFreeze) "
+              "LEFT JOIN temp_table_xy45i7wd ON temp_table_xy45i7wd.Symbol=ProbeSet.Symbol "
+              "LEFT JOIN ProbesetData AS T1 "
+              "ON T1.Id = ProbesetXRef.DataId "
+              "AND T1.StrainId=%(T1_sample_id)s "
+              "LEFT JOIN ProbesetData AS T2 "
+              "ON T2.Id = ProbesetXRef.DataId "
+              "AND T2.StrainId=%(T2_sample_id)s "
+              "LEFT JOIN ProbesetData AS T3 "
+              "ON T3.Id = ProbesetXRef.DataId "
+              "AND T3.StrainId=%(T3_sample_id)s "
+              "WHERE ProbeSet.Symbol IS NOT NULL "
+              "AND temp_table_xy45i7wd.Correlation IS NOT NULL "
+              "AND ProbesetXRef.ProbesetFreezeId = ProbesetFreeze.Id "
+              "AND ProbesetFreeze.Name = %(db_name)s "
+              "AND Probeset.Id = ProbesetXRef.ProbesetId "
+              "ORDER BY Probeset.Id"),
+             3))