From a7fbce242f6683d66452ff02e541aa9b28908f39 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 14 Oct 2021 07:19:32 +0300
Subject: Allow CORS_ORIGINS to be configurable via the environment

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/non-clustered-heatmaps-and-flipping.gmi

* gn3/app.py: setup CORS after all the configuration sources are loaded.
* gn3/settings.py: Parse CORS_ORIGINS from the environment variables.

  Enable the CORS_ORIGINS configuration to be set in the environment variables
  to give the application some flexibility when launching.
---
 gn3/settings.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'gn3/settings.py')

diff --git a/gn3/settings.py b/gn3/settings.py
index 150d96d..56ddaba 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -35,10 +35,17 @@ GENOTYPE_FILES = os.environ.get(
     "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME")))
 
 # CROSS-ORIGIN SETUP
-CORS_ORIGINS = [
+def parse_env_cors(default):
+    origins_str = os.environ.get("CORS_ORIGINS", None)
+    if origins_str:
+        return [
+            origin.strip() for origin in origins_str.split(",") if origin != ""]
+    return default
+
+CORS_ORIGINS = parse_env_cors([
     "http://localhost:*",
     "http://127.0.0.1:*"
-]
+])
 
 CORS_HEADERS = [
     "Content-Type",
-- 
cgit v1.2.3


From 546b37e77c11c5268aa9510b9756f2ed4d60241d Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Thu, 14 Oct 2021 07:31:41 +0300
Subject: Fix some linting issues

---
 gn3/heatmaps.py | 6 +++---
 gn3/settings.py | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'gn3/settings.py')

diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py
index ff65652..2dd9d07 100644
--- a/gn3/heatmaps.py
+++ b/gn3/heatmaps.py
@@ -374,8 +374,8 @@ def clustered_heatmap(
     fig = make_subplots(
         rows=num_plots if vertical else 1,
         cols=1 if vertical else num_plots,
-        shared_xaxes = "columns" if vertical else False,
-        shared_yaxes = False if vertical else "rows",
+        shared_xaxes="columns" if vertical else False,
+        shared_yaxes=False if vertical else "rows",
         vertical_spacing=0.010,
         horizontal_spacing=0.001,
         subplot_titles=["" if vertical else x_axis["label"]] + [
@@ -407,7 +407,7 @@ def clustered_heatmap(
                 "mirror": False,
                 "showticklabels": i == 0,
                 "ticks": "outside" if i == 0 else ""
-        }
+            }
         for i in range(num_plots)}
 
     print("vertical?: {} ==> {}".format("T" if vertical else "F", axes_layouts))
diff --git a/gn3/settings.py b/gn3/settings.py
index 56ddaba..d5f1d3c 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -36,6 +36,7 @@ GENOTYPE_FILES = os.environ.get(
 
 # CROSS-ORIGIN SETUP
 def parse_env_cors(default):
+    """Parse comma-separated configuration into list of strings."""
     origins_str = os.environ.get("CORS_ORIGINS", None)
     if origins_str:
         return [
-- 
cgit v1.2.3


From 8f036415975d6e224e5e94277997329c0f1fa159 Mon Sep 17 00:00:00 2001
From: Alexander Kabui
Date: Fri, 29 Oct 2021 09:49:28 +0300
Subject: Feature/biweight reimplementation (#47)

* add biweight reimplementation with pingouin

* delete biweight scripts and tests

* add python-pingouin to guix file

* delete biweight paths

* mypy fix:pingouin mising imports

* pep8 formatting && pylint fixes---
 gn3/computations/biweight.py                | 27 ------------------
 gn3/computations/correlations.py            | 11 ++++----
 gn3/settings.py                             |  3 --
 guix.scm                                    |  1 +
 mypy.ini                                    |  3 ++
 scripts/calculate_biweight.R                | 43 -----------------------------
 tests/unit/computations/test_biweight.py    | 21 --------------
 tests/unit/computations/test_correlation.py | 11 --------
 8 files changed, 9 insertions(+), 111 deletions(-)
 delete mode 100644 gn3/computations/biweight.py
 delete mode 100644 scripts/calculate_biweight.R
 delete mode 100644 tests/unit/computations/test_biweight.py

(limited to 'gn3/settings.py')

diff --git a/gn3/computations/biweight.py b/gn3/computations/biweight.py
deleted file mode 100644
index 7accd0c..0000000
--- a/gn3/computations/biweight.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""module contains script to call biweight midcorrelation in R"""
-import subprocess
-
-from typing import List
-from typing import Tuple
-
-from gn3.settings import BIWEIGHT_RSCRIPT
-
-
-def calculate_biweight_corr(trait_vals: List,
-                            target_vals: List,
-                            path_to_script: str = BIWEIGHT_RSCRIPT,
-                            command: str = "Rscript"
-                            ) -> Tuple[float, float]:
-    """biweight function"""
-
-    args_1 = ' '.join(str(trait_val) for trait_val in trait_vals)
-    args_2 = ' '.join(str(target_val) for target_val in target_vals)
-    cmd = [command, path_to_script] + [args_1] + [args_2]
-
-    results = subprocess.check_output(cmd, universal_newlines=True)
-    try:
-        (corr_coeff, p_val) = tuple(
-            [float(y.strip()) for y in results.split()])
-        return (corr_coeff, p_val)
-    except Exception as error:
-        raise error
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index bb13ff1..c930df0 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -8,7 +8,7 @@ from typing import Optional
 from typing import Callable
 
 import scipy.stats
-from gn3.computations.biweight import calculate_biweight_corr
+import pingouin as pg
 
 
 def map_shared_keys_to_values(target_sample_keys: List,
@@ -102,11 +102,10 @@ package :not packaged in guix
 
     """
 
-    try:
-        results = calculate_biweight_corr(x_val, y_val)
-        return results
-    except Exception as error:
-        raise error
+    results = pg.corr(x_val, y_val, method="bicor")
+    corr_coeff = results["r"].values[0]
+    p_val = results["p-val"].values[0]
+    return (corr_coeff, p_val)
 
 
 def filter_shared_sample_keys(this_samplelist,
diff --git a/gn3/settings.py b/gn3/settings.py
index d5f1d3c..e85eeff 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -22,9 +22,6 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False
 
 GN2_BASE_URL = "http://www.genenetwork.org/"
 
-# biweight script
-BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R"
-
 # wgcna script
 WGCNA_RSCRIPT = "wgcna_analysis.R"
 # qtlreaper command
diff --git a/guix.scm b/guix.scm
index d8b1596..81e8389 100644
--- a/guix.scm
+++ b/guix.scm
@@ -110,6 +110,7 @@
                        ("r-rjson" ,r-rjson)
                        ("python-plotly" ,python-plotly)
                        ("python-pandas" ,python-pandas)
+                       ("python-pingouin" ,python-pingouin)
                        ("rust-qtlreaper" ,rust-qtlreaper)
                        ("python-flask-cors" ,python-flask-cors)))
   (build-system python-build-system)
diff --git a/mypy.ini b/mypy.ini
index 5d66812..a507703 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -11,3 +11,6 @@ ignore_missing_imports = True
 
 [mypy-ipfshttpclient.*]
 ignore_missing_imports = True
+
+[mypy-pingouin.*]
+ignore_missing_imports = True
\ No newline at end of file
diff --git a/scripts/calculate_biweight.R b/scripts/calculate_biweight.R
deleted file mode 100644
index 8d8366e..0000000
--- a/scripts/calculate_biweight.R
+++ /dev/null
@@ -1,43 +0,0 @@
-
-library(testthat)
-library(WGCNA)
-
-arg_values <- commandArgs(trailingOnly = TRUE)
-ParseArgs <- function(args){
-
-    trait_vals <- as.numeric(unlist(strsplit(args[1], split=" ")))
-    target_vals <- as.numeric(unlist(strsplit(args[2], split=" ")))
-
-    return(list(trait_vals= c(trait_vals),target_vals = c(target_vals)))
-
-}
-BiweightMidCorrelation <- function(trait_val,target_val){
-
-    results <-bicorAndPvalue(as.numeric(unlist(trait_val)),as.numeric(unlist(target_val)))
-    return ((c(c(results$bicor)[1],c(results$p)[1])))
-
-}
-
-
-
-test_that("biweight results"),{
-    vec_1 <- c(1,2,3,4)
-    vec_2 <- c(1,2,3,4)
-
-    results <- BiweightMidCorrelation(vec_1,vec_2)
-    expect_equal(c(1.0,0.0),results)
-}
-
-
-test_that("parsing args "),{
-    my_args <- c("1 2 3 4","5 6 7 8")
-    results <- ParseArgs(my_args)
-
-    expect_equal(results[1],c(1,2,3,4))
-    expect_equal(results[2],c(5,6,7,8))
-}
-
-parsed_values <- ParseArgs(arg_values)
-
-
-cat(BiweightMidCorrelation(parsed_values[1],parsed_values[2]))
\ No newline at end of file
diff --git a/tests/unit/computations/test_biweight.py b/tests/unit/computations/test_biweight.py
deleted file mode 100644
index ad404f1..0000000
--- a/tests/unit/computations/test_biweight.py
+++ /dev/null
@@ -1,21 +0,0 @@
-"""test for biweight script"""
-from unittest import TestCase
-from unittest import mock
-
-from gn3.computations.biweight import calculate_biweight_corr
-
-
-class TestBiweight(TestCase):
-    """test class for biweight"""
-
-    @mock.patch("gn3.computations.biweight.subprocess.check_output")
-    def test_calculate_biweight_corr(self, mock_check_output):
-        """test for calculate_biweight_corr func"""
-        mock_check_output.return_value = "0.1 0.5"
-        results = calculate_biweight_corr(command="Rscript",
-                                          path_to_script="./r_script.R",
-                                          trait_vals=[
-                                              1.2, 1.1, 1.9],
-                                          target_vals=[1.9, 0.4, 1.1])
-
-        self.assertEqual(results, (0.1, 0.5))
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
index fc52ec1..96d9c6d 100644
--- a/tests/unit/computations/test_correlation.py
+++ b/tests/unit/computations/test_correlation.py
@@ -5,7 +5,6 @@ from unittest import mock
 from collections import namedtuple
 
 from gn3.computations.correlations import normalize_values
-from gn3.computations.correlations import do_bicor
 from gn3.computations.correlations import compute_sample_r_correlation
 from gn3.computations.correlations import compute_all_sample_correlation
 from gn3.computations.correlations import filter_shared_sample_keys
@@ -98,16 +97,6 @@ class TestCorrelation(TestCase):
 
         self.assertEqual(results, expected_results)
 
-    @mock.patch("gn3.computations.correlations.calculate_biweight_corr")
-    def test_bicor(self, mock_biweight):
-        """Test for doing biweight mid correlation """
-        mock_biweight.return_value = (1.0, 0.0)
-
-        results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6])
-
-        self.assertEqual(results, (1.0, 0.0)
-                         )
-
     @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value")
     @mock.patch("gn3.computations.correlations.normalize_values")
     def test_compute_sample_r_correlation(self, norm_vals, compute_corr):
-- 
cgit v1.2.3


From 457f2a8473a1d44dfcb66d0c28aa1c7a3a256c85 Mon Sep 17 00:00:00 2001
From: Frederick Muriuki Muriithi
Date: Mon, 1 Nov 2021 10:49:35 +0300
Subject: Implement `compute_partial_correlations_fast`

Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* Implement `compute_partial_correlations_fast` that is a partial migration of
  `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` in
  GN1.

  This function will probably be reworked once the dependencies are fully
  migrated.

  It also needs tests to be added.
---
 gn3/computations/partial_correlations.py | 49 ++++++++++++++++++++++++++++++++
 gn3/settings.py                          |  3 ++
 2 files changed, 52 insertions(+)

(limited to 'gn3/settings.py')

diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index ba4de9e..1a6868a 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -9,6 +9,9 @@ from functools import reduce
 from typing import Any, Tuple, Sequence
 from scipy.stats import pearsonr, spearmanr
 
+from gn3.settings import TEXTDIR
+from gn3.data_helpers import parse_csv_line
+
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
     """
     Fetches data for the control traits.
@@ -192,3 +195,49 @@ def good_dataset_samples_indexes(
     return tuple(sorted(
         samples_from_file.index(good) for good in
         set(samples).intersection(set(samples_from_file))))
+
+def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914]
+        samples, primary_vals, control_vals, database_filename,
+        fetched_correlations, method: str, correlation_type: str) -> Tuple[
+            float, Tuple[float, ...]]:
+    """
+    This is a partial migration of the
+    `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast`
+    function in GeneNetwork1.
+    """
+    assert method in ("spearman", "pearson")
+    with open(f"{TEXTDIR}/{database_filename}", "r") as dataset_file:
+        dataset = tuple(dataset_file.readlines())
+
+    good_dataset_samples = good_dataset_samples_indexes(
+        samples, parse_csv_line(dataset[0])[1:])
+
+    def __process_trait_names_and_values__(acc, line):
+        trait_line = parse_csv_line(line)
+        trait_name = trait_line[0]
+        trait_data = trait_line[1:]
+        if trait_name in fetched_correlations.keys():
+            return (
+                acc[0] + (trait_name,),
+                acc[1] + tuple(
+                    trait_data[i] if i in good_dataset_samples else None
+                    for i in range(len(trait_data))))
+        return acc
+
+    processed_trait_names_values: tuple = reduce(
+        __process_trait_names_and_values__, dataset[1:], (tuple(), tuple()))
+    all_target_trait_names: Tuple[str, ...] = processed_trait_names_values[0]
+    all_target_trait_values: Tuple[float, ...] = processed_trait_names_values[1]
+
+    all_correlations = determine_partials(
+        primary_vals, control_vals, all_target_trait_names,
+        all_target_trait_values, method)
+    ## Line 772 to 779 in GN1 are the cause of the weird complexity in the
+    ## return below. Once the surrounding code is successfully migrated and
+    ## reworked, this complexity might go away, by getting rid of the
+    ## `correlation_type` parameter
+    return len(all_correlations), tuple(
+        corr + (
+            (fetched_correlations[corr[0]],) if correlation_type == "literature"
+            else fetched_correlations[corr[0]][0:2])
+        for idx, corr in enumerate(all_correlations))
diff --git a/gn3/settings.py b/gn3/settings.py
index e85eeff..57c63df 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -50,3 +50,6 @@ CORS_HEADERS = [
     "Authorization",
     "Access-Control-Allow-Credentials"
 ]
+
+GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/")
+TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix"
-- 
cgit v1.2.3