From 8f036415975d6e224e5e94277997329c0f1fa159 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 29 Oct 2021 09:49:28 +0300 Subject: Feature/biweight reimplementation (#47) * add biweight reimplementation with pingouin * delete biweight scripts and tests * add python-pingouin to guix file * delete biweight paths * mypy fix:pingouin mising imports * pep8 formatting && pylint fixes--- gn3/computations/biweight.py | 27 ------------------ gn3/computations/correlations.py | 11 ++++---- gn3/settings.py | 3 -- guix.scm | 1 + mypy.ini | 3 ++ scripts/calculate_biweight.R | 43 ----------------------------- tests/unit/computations/test_biweight.py | 21 -------------- tests/unit/computations/test_correlation.py | 11 -------- 8 files changed, 9 insertions(+), 111 deletions(-) delete mode 100644 gn3/computations/biweight.py delete mode 100644 scripts/calculate_biweight.R delete mode 100644 tests/unit/computations/test_biweight.py diff --git a/gn3/computations/biweight.py b/gn3/computations/biweight.py deleted file mode 100644 index 7accd0c..0000000 --- a/gn3/computations/biweight.py +++ /dev/null @@ -1,27 +0,0 @@ -"""module contains script to call biweight midcorrelation in R""" -import subprocess - -from typing import List -from typing import Tuple - -from gn3.settings import BIWEIGHT_RSCRIPT - - -def calculate_biweight_corr(trait_vals: List, - target_vals: List, - path_to_script: str = BIWEIGHT_RSCRIPT, - command: str = "Rscript" - ) -> Tuple[float, float]: - """biweight function""" - - args_1 = ' '.join(str(trait_val) for trait_val in trait_vals) - args_2 = ' '.join(str(target_val) for target_val in target_vals) - cmd = [command, path_to_script] + [args_1] + [args_2] - - results = subprocess.check_output(cmd, universal_newlines=True) - try: - (corr_coeff, p_val) = tuple( - [float(y.strip()) for y in results.split()]) - return (corr_coeff, p_val) - except Exception as error: - raise error diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index bb13ff1..c930df0 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -8,7 +8,7 @@ from typing import Optional from typing import Callable import scipy.stats -from gn3.computations.biweight import calculate_biweight_corr +import pingouin as pg def map_shared_keys_to_values(target_sample_keys: List, @@ -102,11 +102,10 @@ package :not packaged in guix """ - try: - results = calculate_biweight_corr(x_val, y_val) - return results - except Exception as error: - raise error + results = pg.corr(x_val, y_val, method="bicor") + corr_coeff = results["r"].values[0] + p_val = results["p-val"].values[0] + return (corr_coeff, p_val) def filter_shared_sample_keys(this_samplelist, diff --git a/gn3/settings.py b/gn3/settings.py index d5f1d3c..e85eeff 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -22,9 +22,6 @@ SQLALCHEMY_TRACK_MODIFICATIONS = False GN2_BASE_URL = "http://www.genenetwork.org/" -# biweight script -BIWEIGHT_RSCRIPT = "~/genenetwork3/scripts/calculate_biweight.R" - # wgcna script WGCNA_RSCRIPT = "wgcna_analysis.R" # qtlreaper command diff --git a/guix.scm b/guix.scm index d8b1596..81e8389 100644 --- a/guix.scm +++ b/guix.scm @@ -110,6 +110,7 @@ ("r-rjson" ,r-rjson) ("python-plotly" ,python-plotly) ("python-pandas" ,python-pandas) + ("python-pingouin" ,python-pingouin) ("rust-qtlreaper" ,rust-qtlreaper) ("python-flask-cors" ,python-flask-cors))) (build-system python-build-system) diff --git a/mypy.ini b/mypy.ini index 5d66812..a507703 100644 --- a/mypy.ini +++ b/mypy.ini @@ -11,3 +11,6 @@ ignore_missing_imports = True [mypy-ipfshttpclient.*] ignore_missing_imports = True + +[mypy-pingouin.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/scripts/calculate_biweight.R b/scripts/calculate_biweight.R deleted file mode 100644 index 8d8366e..0000000 --- a/scripts/calculate_biweight.R +++ /dev/null @@ -1,43 +0,0 @@ - -library(testthat) -library(WGCNA) - -arg_values <- commandArgs(trailingOnly = TRUE) -ParseArgs <- function(args){ - - trait_vals <- as.numeric(unlist(strsplit(args[1], split=" "))) - target_vals <- as.numeric(unlist(strsplit(args[2], split=" "))) - - return(list(trait_vals= c(trait_vals),target_vals = c(target_vals))) - -} -BiweightMidCorrelation <- function(trait_val,target_val){ - - results <-bicorAndPvalue(as.numeric(unlist(trait_val)),as.numeric(unlist(target_val))) - return ((c(c(results$bicor)[1],c(results$p)[1]))) - -} - - - -test_that("biweight results"),{ - vec_1 <- c(1,2,3,4) - vec_2 <- c(1,2,3,4) - - results <- BiweightMidCorrelation(vec_1,vec_2) - expect_equal(c(1.0,0.0),results) -} - - -test_that("parsing args "),{ - my_args <- c("1 2 3 4","5 6 7 8") - results <- ParseArgs(my_args) - - expect_equal(results[1],c(1,2,3,4)) - expect_equal(results[2],c(5,6,7,8)) -} - -parsed_values <- ParseArgs(arg_values) - - -cat(BiweightMidCorrelation(parsed_values[1],parsed_values[2])) \ No newline at end of file diff --git a/tests/unit/computations/test_biweight.py b/tests/unit/computations/test_biweight.py deleted file mode 100644 index ad404f1..0000000 --- a/tests/unit/computations/test_biweight.py +++ /dev/null @@ -1,21 +0,0 @@ -"""test for biweight script""" -from unittest import TestCase -from unittest import mock - -from gn3.computations.biweight import calculate_biweight_corr - - -class TestBiweight(TestCase): - """test class for biweight""" - - @mock.patch("gn3.computations.biweight.subprocess.check_output") - def test_calculate_biweight_corr(self, mock_check_output): - """test for calculate_biweight_corr func""" - mock_check_output.return_value = "0.1 0.5" - results = calculate_biweight_corr(command="Rscript", - path_to_script="./r_script.R", - trait_vals=[ - 1.2, 1.1, 1.9], - target_vals=[1.9, 0.4, 1.1]) - - self.assertEqual(results, (0.1, 0.5)) diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py index fc52ec1..96d9c6d 100644 --- a/tests/unit/computations/test_correlation.py +++ b/tests/unit/computations/test_correlation.py @@ -5,7 +5,6 @@ from unittest import mock from collections import namedtuple from gn3.computations.correlations import normalize_values -from gn3.computations.correlations import do_bicor from gn3.computations.correlations import compute_sample_r_correlation from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import filter_shared_sample_keys @@ -98,16 +97,6 @@ class TestCorrelation(TestCase): self.assertEqual(results, expected_results) - @mock.patch("gn3.computations.correlations.calculate_biweight_corr") - def test_bicor(self, mock_biweight): - """Test for doing biweight mid correlation """ - mock_biweight.return_value = (1.0, 0.0) - - results = do_bicor(x_val=[1, 2, 3], y_val=[4, 5, 6]) - - self.assertEqual(results, (1.0, 0.0) - ) - @mock.patch("gn3.computations.correlations.compute_corr_coeff_p_value") @mock.patch("gn3.computations.correlations.normalize_values") def test_compute_sample_r_correlation(self, norm_vals, compute_corr): -- cgit v1.2.3