From 5abe0158daed9428484925b95bb4a8b2251adea2 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Sat, 18 Dec 2021 08:09:04 +0300
Subject: Add dataset type to the results

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* The dataset type is relevant for the display of the data, therefore, this
  commit presents the dataset type as part of the results.
---
 gn3/computations/partial_correlations.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 13c411a..1c02533 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -717,7 +717,8 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
     return {
         "status": "success",
         "results": {
-        "primary_trait": primary_trait,
-        "control_traits": cntrl_traits,
-        "correlations": trait_list
+            "primary_trait": primary_trait,
+            "control_traits": cntrl_traits,
+            "correlations": trait_list,
+            "dataset_type": target_dataset["type"]
         }}
-- 
cgit 1.4.1


From 4304e7298af6769110a251b21fca2f105bee4c06 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Sat, 18 Dec 2021 11:36:07 +0300
Subject: Reduce the total amount of data to be output

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* There is a lot of data that is not necessary in the final result. This
  commit removes that data, retaining only data relevant for the display.
---
 gn3/computations/partial_correlations.py | 56 ++++++++++++++++++++++++++++++--
 1 file changed, 53 insertions(+), 3 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 1c02533..9fb17f7 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -526,6 +526,54 @@ def tissue_correlation_by_list(
         } for trait in trait_list)
     return trait_list
 
+def trait_for_output(trait):
+    """
+    Process a trait for output.
+
+    Removes a lot of extraneous data from the trait, that is not needed for
+    the display of partial correlation results.
+    This function also removes all key-value pairs, for which the value is
+    `None`, because it is a waste of network resources to transmit the key-value
+    pair just to indicate it does not exist.
+    """
+    trait = {
+        "trait_type": trait["trait_type"],
+        "dataset_name": trait["db"]["dataset_name"],
+        "dataset_type": trait["db"]["dataset_type"],
+        "group": trait["db"]["group"],
+        "trait_fullname": trait["trait_fullname"],
+        "trait_name": trait["trait_name"],
+        "symbol": trait.get("symbol"),
+        "description": trait.get("description"),
+        "pre_publication_description": trait.get(
+            "pre_publication_description"),
+        "post_publication_description": trait.get(
+            "post_publication_description"),
+        "original_description": trait.get(
+            "original_description"),
+        "authors": trait.get("authors"),
+        "year": trait.get("year"),
+        "probe_target_description": trait.get(
+            "probe_target_description"),
+        "chr": trait.get("chr"),
+        "mb": trait.get("mb"),
+        "geneid": trait.get("geneid"),
+        "homologeneid": trait.get("homologeneid"),
+        "noverlap": trait.get("noverlap"),
+        "partial_corr": trait.get("partial_corr"),
+        "partial_corr_p_value": trait.get("partial_corr_p_value"),
+        "corr": trait.get("corr"),
+        "corr_p_value": trait.get("corr_p_value"),
+        "rank_order": trait.get("rank_order"),
+        "delta": (
+            None if trait.get("partial_corr") is None
+            else (trait.get("partial_corr") - trait.get("corr"))),
+        "l_corr":  trait.get("l_corr"),
+        "tissue_corr": trait.get("tissue_corr"),
+        "tissue_p_value": trait.get("tissue_p_value")
+    }
+    return {key: val for key, val in trait.items() if val is not None}
+
 def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
         conn: Any, primary_trait_name: str,
         control_trait_names: Tuple[str, ...], method: str,
@@ -717,8 +765,10 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
     return {
         "status": "success",
         "results": {
-            "primary_trait": primary_trait,
-            "control_traits": cntrl_traits,
-            "correlations": trait_list,
+            "primary_trait": trait_for_output(primary_trait),
+            "control_traits": tuple(
+                trait_for_output(trait) for trait in cntrl_traits),
+            "correlations": tuple(
+                trait_for_output(trait) for trait in trait_list),
             "dataset_type": target_dataset["type"]
         }}
-- 
cgit 1.4.1


From 0508fc422c033cfff8bbea118f85282212d236e4 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 20 Dec 2021 09:38:17 +0300
Subject: Return the correlation method used

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Return the correlation method used
---
 gn3/computations/partial_correlations.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 9fb17f7..dbcbe29 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -770,5 +770,6 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
                 trait_for_output(trait) for trait in cntrl_traits),
             "correlations": tuple(
                 trait_for_output(trait) for trait in trait_list),
-            "dataset_type": target_dataset["type"]
+            "dataset_type": target_dataset["type"],
+            "method": "spearman" if "spearman" in method.lower() else "pearson"
         }}
-- 
cgit 1.4.1


From 2d3b6eae6953d5e4b00f21b5ffd683271d0f76bc Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Tue, 21 Dec 2021 13:04:51 +0300
Subject: Fix sorting

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Update the sorting algorithm, for literature and tissue correlations so that
  it sorts the results by the correlation value first then by the p-value
  next.
---
 gn3/computations/partial_correlations.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index dbcbe29..1e4a646 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -717,19 +717,30 @@ def partial_correlations_entry(# pylint: disable=[R0913, R0914, R0911]
 
 
     def __make_sorter__(method):
-        def __sort_6__(row):
-            return row[6]
-
-        def __sort_3__(row):
+        def __compare_lit_or_tiss_correlation_values_(row):
+            # Index  Content
+            # 0      trait name
+            # 1      N
+            # 2      partial correlation coefficient
+            # 3      p value of partial correlation
+            # 6      literature/tissue correlation value
+            return (row[6], row[3])
+
+        def __compare_partial_correlation_p_values__(row):
+            # Index  Content
+            # 0      trait name
+            # 1      partial correlation coefficient
+            # 2      N
+            # 3      p value of partial correlation
             return row[3]
 
         if "literature" in method.lower():
-            return __sort_6__
+            return __compare_lit_or_tiss_correlation_values_
 
         if "tissue" in method.lower():
-            return __sort_6__
+            return __compare_lit_or_tiss_correlation_values_
 
-        return __sort_3__
+        return __compare_partial_correlation_p_values__
 
     sorted_correlations = sorted(
         all_correlations, key=__make_sorter__(method))
-- 
cgit 1.4.1


From 672c1f2d43961feb59d9549557cb10c4f0b19dcf Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:13:32 +0300
Subject: Fix linting errors

---
 gn3/authentication.py            | 5 +++++
 gn3/computations/correlations.py | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'gn3/computations')

diff --git a/gn3/authentication.py b/gn3/authentication.py
index a6372c1..4aedacd 100644
--- a/gn3/authentication.py
+++ b/gn3/authentication.py
@@ -163,3 +163,8 @@ def create_group(conn: Redis, group_name: Optional[str],
         }
         conn.hset("groups", group_id, json.dumps(group))
         return group
+    # This might break stuff, but it fixes the linting error regarding
+    # inconsistent return types.
+    # @BonfaceKilz please review this and replace with appropriate return and
+    # remove these comments.
+    return None
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index d38946e..345b8d7 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -79,7 +79,7 @@ def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
             zip(*list(normalize_values(trait_vals, target_samples_vals))))
         num_overlap = len(normalized_traits_vals)
     except ValueError:
-        return
+        return None
 
     if num_overlap > 5:
 
-- 
cgit 1.4.1


From 784447b17d85a618005ac9acfc57f5b7ef8f5169 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Wed, 22 Dec 2021 09:14:04 +0300
Subject: Fix typing errors

---
 gn3/computations/correlations.py         |  3 ++-
 gn3/computations/partial_correlations.py | 10 ++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'gn3/computations')

diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 345b8d7..1b4b3a4 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -7,6 +7,7 @@ from typing import List
 from typing import Tuple
 from typing import Optional
 from typing import Callable
+from typing import Generator
 
 import scipy.stats
 import pingouin as pg
@@ -106,7 +107,7 @@ package :not packaged in guix
 
 
 def filter_shared_sample_keys(this_samplelist,
-                              target_samplelist) -> Tuple[List, List]:
+                              target_samplelist) -> Generator:
     """Given primary and target sample-list for two base and target trait select
     filter the values using the shared keys
 
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 1e4a646..984c15a 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -217,7 +217,7 @@ def good_dataset_samples_indexes(
 def partial_correlations_fast(# pylint: disable=[R0913, R0914]
         samples, primary_vals, control_vals, database_filename,
         fetched_correlations, method: str, correlation_type: str) -> Tuple[
-            float, Tuple[float, ...]]:
+            int, Tuple[float, ...]]:
     """
     Computes partial correlation coefficients using data from a CSV file.
 
@@ -350,7 +350,9 @@ def compute_partial(
 def partial_correlations_normal(# pylint: disable=R0913
         primary_vals, control_vals, input_trait_gene_id, trait_database,
         data_start_pos: int, db_type: str, method: str) -> Tuple[
-            float, Tuple[float, ...]]:
+            int, Tuple[Union[
+                Tuple[str, int, float, float, float, float], None],
+                       ...]]:#Tuple[float, ...]
     """
     Computes the correlation coefficients.
 
@@ -485,7 +487,7 @@ def literature_correlation_by_list(
 
 def tissue_correlation_by_list(
         conn: Any, primary_trait_symbol: str, tissue_probeset_freeze_id: int,
-        method: str, trait_list: Tuple[dict]) -> Tuple[dict]:
+        method: str, trait_list: Tuple[dict]) -> Tuple[dict, ...]:
     """
     This is a migration of the
     `web.webqtl.correlation.CorrelationPage.getTissueCorrelationByList`
@@ -508,7 +510,7 @@ def tissue_correlation_by_list(
             primary_trait_value = prim_trait_symbol_value_dict[
                 primary_trait_symbol.lower()]
             gene_symbol_list = tuple(
-                trait for trait in trait_list if "symbol" in trait.keys())
+                trait["symbol"] for trait in trait_list if "symbol" in trait.keys())
             symbol_value_dict = fetch_gene_symbol_tissue_value_dict_for_trait(
                 gene_symbol_list, tissue_probeset_freeze_id, conn)
             return tuple(
-- 
cgit 1.4.1