From a7fbce242f6683d66452ff02e541aa9b28908f39 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 14 Oct 2021 07:19:32 +0300 Subject: Allow CORS_ORIGINS to be configurable via the environment Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/non-clustered-heatmaps-and-flipping.gmi * gn3/app.py: setup CORS after all the configuration sources are loaded. * gn3/settings.py: Parse CORS_ORIGINS from the environment variables. Enable the CORS_ORIGINS configuration to be set in the environment variables to give the application some flexibility when launching. --- gn3/settings.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'gn3/settings.py') diff --git a/gn3/settings.py b/gn3/settings.py index 150d96d..56ddaba 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -35,10 +35,17 @@ GENOTYPE_FILES = os.environ.get( "GENOTYPE_FILES", "{}/genotype_files/genotype".format(os.environ.get("HOME"))) # CROSS-ORIGIN SETUP -CORS_ORIGINS = [ +def parse_env_cors(default): + origins_str = os.environ.get("CORS_ORIGINS", None) + if origins_str: + return [ + origin.strip() for origin in origins_str.split(",") if origin != ""] + return default + +CORS_ORIGINS = parse_env_cors([ "http://localhost:*", "http://127.0.0.1:*" -] +]) CORS_HEADERS = [ "Content-Type", -- cgit v1.2.3 From 546b37e77c11c5268aa9510b9756f2ed4d60241d Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 14 Oct 2021 07:31:41 +0300 Subject: Fix some linting issues --- gn3/heatmaps.py | 6 +++--- gn3/settings.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'gn3/settings.py') diff --git a/gn3/heatmaps.py b/gn3/heatmaps.py index ff65652..2dd9d07 100644 --- a/gn3/heatmaps.py +++ b/gn3/heatmaps.py @@ -374,8 +374,8 @@ def clustered_heatmap( fig = make_subplots( rows=num_plots if vertical else 1, cols=1 if vertical else num_plots, - shared_xaxes = "columns" if vertical else False, - shared_yaxes = False if vertical else "rows", + shared_xaxes="columns" if vertical else False, + shared_yaxes=False if vertical else "rows", vertical_spacing=0.010, horizontal_spacing=0.001, subplot_titles=["" if vertical else x_axis["label"]] + [ @@ -407,7 +407,7 @@ def clustered_heatmap( "mirror": False, "showticklabels": i == 0, "ticks": "outside" if i == 0 else "" - } + } for i in range(num_plots)} print("vertical?: {} ==> {}".format("T" if vertical else "F", axes_layouts)) diff --git a/gn3/settings.py b/gn3/settings.py index 56ddaba..d5f1d3c 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -36,6 +36,7 @@ GENOTYPE_FILES = os.environ.get( # CROSS-ORIGIN SETUP def parse_env_cors(default): + """Parse comma-separated configuration into list of strings.""" origins_str = os.environ.get("CORS_ORIGINS", None) if origins_str: return [ -- cgit v1.2.3 From 8f036415975d6e224e5e94277997329c0f1fa159 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 29 Oct 2021 09:49:28 +0300 Subject: Feature/biweight reimplementation (#47) * add biweight reimplementation with pingouin * delete biweight scripts and tests * add python-pingouin to guix file * delete biweight paths * mypy fix:pingouin mising imports * pep8 formatting && pylint fixes--- gn3/computations/biweight.py | 27 ------------------ gn3/computations/correlations.py | 11 ++++---- gn3/settings.py | 3 -- guix.scm | 1 + mypy.ini | 3 ++ scripts/calculate_biweight.R | 43 ----------------------------- tests/unit/computations/test_biweight.py | 21 -------------- tests/unit/computations/test_correlation.py | 11 -------- 8 files changed, 9 insertions(+), 111 deletions(-) delete mode 100644 gn3/computations/biweight.py delete mode 100644 scripts/calculate_biweight.R delete mode 100644 tests/unit/computations/test_biweight.py (limited to 'gn3/settings.py') diff --git a/gn3/computations/biweight.py b/gn3/computations/biweight.py deleted file mode 100644 index 7accd0c..0000000 --- a/gn3/computations/biweight.py +++ /dev/null @@ -1,27 +0,0 @@ -"""module contains script to call biweight midcorrelation in R""" -import subprocess - -from typing import List -from typing import Tuple - -from gn3.settings import BIWEIGHT_RSCRIPT - - -def calculate_biweight_corr(trait_vals: List, - target_vals: List, - path_to_script: str = BIWEIGHT_RSCRIPT, - command: str = "Rscript" - ) -> Tuple[float, float]: - """biweight function""" - - args_1 = ' '.join(str(trait_val) for trait_val in trait_vals) - args_2 = ' '.join(str(target_val) for target_val in target_vals) - cmd = [command, path_to_script] + [args_1] + [args_2] - - results = subprocess.check_output(cmd, universal_newlines=True) - try: - (corr_coeff, p_val) = tuple( - [float(y.strip()) for y in results.split()]) - return (corr_coeff, p_val) - except Exception as error: - raise error diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index bb13ff1..c930df0 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -8,7 +8,7 @@ from typing import Optional from typing import Callable import scipy.stats -from gn3.computations.biweight import calculate_biweight_corr +import pingouin as pg def map_shared_keys_to_values(target_sample_keys: List, @@ -102,11 +102,10 @@ package :not packaged in guix """ - try: - results = calculate_biweight_corr(x_val, y_val) - return results - except Exception as error: - raise error + results = pg.corr(x_val, y_val, method="bicor") + corr_coeff = results["r"].values[0] + p_val = results["p-val"].values[0] + return (corr_coeff, p_val) def filter_shared_sample_keys(this_samplelist, diff --git a/gn3/settings.py b/gn3/settings.py index d5f1d3c..e85eeff 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -22,9 +22,6 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False GN2_BASE_URL = "http://www.genenetwork.org/" -# biweight script -BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" - # wgcna script WGCNA_RSCRIPT = "wgcna_analysis.R" # qtlreaper command diff --git a/guix.scm b/guix.scm index d8b1596..81e8389 100644 --- a/guix.scm +++ b/guix.scm @@ -110,6 +110,7 @@ ("r-rjson" ,r-rjson) ("python-plotly" ,python-plotly) ("python-pandas" ,python-pandas) + ("python-pingouin" ,python-pingouin) ("rust-qtlreaper" ,rust-qtlreaper) ("python-flask-cors" ,python-flask-cors))) (build-system python-build-system) diff --git a/mypy.ini b/mypy.ini index 5d66812..a507703 100644 --- a/mypy.ini +++ b/mypy.ini @@ -11,3 +11,6 @@ ignore_missing_imports = True [mypy-ipfshttpclient.*] ignore_missing_imports = True + +[mypy-pingouin.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/scripts/calculate_biweight.R b/scripts/calculate_biweight.R deleted file mode 100644 index 8d8366e..0000000 --- a/scripts/calculate_biweight.R +++ /dev/null @@ -1,43 +0,0 @@ - -library(testthat) -library(WGCNA) - -arg_values <- commandArgs(trailingOnly = TRUE) -ParseArgs <- function(args){ - - trait_vals <- as.numeric(unlist(strsplit(args[1], split=" "))) - target_vals <- as.numeric(unlist(strsplit(args[2], split=" "))) - - return(list(trait_vals= c(trait_vals),target_vals = c(target_vals))) - -} -BiweightMidCorrelation <- function(trait_val,target_val){ - - results <-bicorAndPvalue(as.numeric(unlist(trait_val)),as.numeric(unlist(target_val))) - return ((c(c(results$bicor)[1],c(results$p)[1]))) - -} - - - -test_that("biweight results"),{ - vec_1 <- c(1,2,3,4) - vec_2 <- c(1,2,3,4) - - results <- BiweightMidCorrelation(vec_1,vec_2) - expect_equal(c(1.0,0.0),results) -} - - -test_that("parsing args "),{ - my_args <- c("1 2 3 4","5 6 7 8") - results <- ParseArgs(my_args) - - expect_equal(results[1],c(1,2,3,4)) - expect_equal(results[2],c(5,6,7,8)) -} - -parsed_values <- ParseArgs(arg_values) - - -cat(BiweightMidCorrelation(parsed_values[1],parsed_values[2])) \ No newline at end of file diff --git a/tests/unit/computations/test_biweight.py b/tests/unit/computations/test_biweight.py deleted file mode 100644 index ad404f1..0000000 --- a/tests/unit/computations/test_biweight.py +++ /dev/null @@ -1,21 +0,0 @@ -"""test for biweight script""" -from unittest import TestCase -from unittest import mock - -from gn3.computations.biweight import calculate_biweight_corr - - -class TestBiweight(TestCase): - """test class for biweight""" - - @mock.patch("gn3.computations.biweight.subprocess.check_output") - def test_calculate_biweight_corr(self, mock_check_output): - """test for calculate_biweight_corr func""" - mock_check_output.return_value = "0.1 0.5" - results = calculate_biweight_corr(command="Rscript", - path_to_script="./r_script.R", - trait_vals=[ - 1.2, 1.1, 1.9], - target_vals=[1.9, 0.4, 1.1]) - - self.assertEqual(results, (0.1, 0.5)) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index fc52ec1..96d9c6d 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -5,7 +5,6 @@ from unittest import mock from collections import namedtuple from gn3.computations.correlations import normalize_values -from gn3.computations.correlations import do_bicor from gn3.computations.correlations import compute_sample_r_correlation from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import filter_shared_sample_keys @@ -98,16 +97,6 @@ class TestCorrelation(TestCase): self.assertEqual(results, expected_results) - @mock.patch("gn3.computations.correlations.calculate_biweight_corr") - def test_bicor(self, mock_biweight): - """Test for doing biweight mid correlation """ - mock_biweight.return_value = (1.0, 0.0) - - results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6]) - - self.assertEqual(results, (1.0, 0.0) - ) - @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") @mock.patch("gn3.computations.correlations.normalize_values") def test_compute_sample_r_correlation(self, norm_vals, compute_corr): -- cgit v1.2.3 From 457f2a8473a1d44dfcb66d0c28aa1c7a3a256c85 Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 10:49:35 +0300 Subject: Implement `compute_partial_correlations_fast` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Implement `compute_partial_correlations_fast` that is a partial migration of `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` in GN1. This function will probably be reworked once the dependencies are fully migrated. It also needs tests to be added. --- gn3/computations/partial_correlations.py | 49 ++++++++++++++++++++++++++++++++ gn3/settings.py | 3 ++ 2 files changed, 52 insertions(+) (limited to 'gn3/settings.py') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index ba4de9e..1a6868a 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -9,6 +9,9 @@ from functools import reduce from typing import Any, Tuple, Sequence from scipy.stats import pearsonr, spearmanr +from gn3.settings import TEXTDIR +from gn3.data_helpers import parse_csv_line + def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): """ Fetches data for the control traits. @@ -192,3 +195,49 @@ def good_dataset_samples_indexes( return tuple(sorted( samples_from_file.index(good) for good in set(samples).intersection(set(samples_from_file)))) + +def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914] + samples, primary_vals, control_vals, database_filename, + fetched_correlations, method: str, correlation_type: str) -> Tuple[ + float, Tuple[float, ...]]: + """ + This is a partial migration of the + `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` + function in GeneNetwork1. + """ + assert method in ("spearman", "pearson") + with open(f"{TEXTDIR}/{database_filename}", "r") as dataset_file: + dataset = tuple(dataset_file.readlines()) + + good_dataset_samples = good_dataset_samples_indexes( + samples, parse_csv_line(dataset[0])[1:]) + + def __process_trait_names_and_values__(acc, line): + trait_line = parse_csv_line(line) + trait_name = trait_line[0] + trait_data = trait_line[1:] + if trait_name in fetched_correlations.keys(): + return ( + acc[0] + (trait_name,), + acc[1] + tuple( + trait_data[i] if i in good_dataset_samples else None + for i in range(len(trait_data)))) + return acc + + processed_trait_names_values: tuple = reduce( + __process_trait_names_and_values__, dataset[1:], (tuple(), tuple())) + all_target_trait_names: Tuple[str, ...] = processed_trait_names_values[0] + all_target_trait_values: Tuple[float, ...] = processed_trait_names_values[1] + + all_correlations = determine_partials( + primary_vals, control_vals, all_target_trait_names, + all_target_trait_values, method) + ## Line 772 to 779 in GN1 are the cause of the weird complexity in the + ## return below. Once the surrounding code is successfully migrated and + ## reworked, this complexity might go away, by getting rid of the + ## `correlation_type` parameter + return len(all_correlations), tuple( + corr + ( + (fetched_correlations[corr[0]],) if correlation_type == "literature" + else fetched_correlations[corr[0]][0:2]) + for idx, corr in enumerate(all_correlations)) diff --git a/gn3/settings.py b/gn3/settings.py index e85eeff..57c63df 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -50,3 +50,6 @@ CORS_HEADERS = [ "Authorization", "Access-Control-Allow-Credentials" ] + +GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/") +TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix" -- cgit v1.2.3