From 05740a60d6616f28751f96ca30adeb524f4369ad Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Mon, 1 Nov 2021 10:49:35 +0300 Subject: Implement `compute_partial_correlations_fast` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * Implement `compute_partial_correlations_fast` that is a partial migration of `web.webqtl.correlation.PartialCorrDBPage.getPartialCorrelationsFast` in GN1. This function will probably be reworked once the dependencies are fully migrated. It also needs tests to be added. --- gn3/settings.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'gn3/settings.py') diff --git a/gn3/settings.py b/gn3/settings.py index e85eeff..57c63df 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -50,3 +50,6 @@ CORS_HEADERS = [ "Authorization", "Access-Control-Allow-Credentials" ] + +GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/") +TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix" -- cgit v1.2.3 From 9647226ea4c85449581df713c2bb583aeed6940f Mon Sep 17 00:00:00 2001 From: Frederick Muriuki Muriithi Date: Thu, 4 Nov 2021 12:43:28 +0300 Subject: Partially implement `partial_correlation_recursive` Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/computations/partial_correlations.py: Implement one path for the `gn3.computations.partial_correlations.partial_correlation_recursive` function. * gn3/settings.py: Add a setting for how many decimal places to round to * tests/unit/computations/test_partial_correlations.py: Update test to take the number of decimal places into consideration Implement a single path (where the z value is a vector and not a matrix) for the `partial_correlation_recursive` function. --- gn3/computations/partial_correlations.py | 41 ++++++++++++++++++---- gn3/settings.py | 2 ++ .../unit/computations/test_partial_correlations.py | 4 ++- 3 files changed, 39 insertions(+), 8 deletions(-) (limited to 'gn3/settings.py') diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index ffdf0c5..bd127a7 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -5,13 +5,14 @@ It is an attempt to migrate over the partial correlations feature from GeneNetwork1. """ +import math from functools import reduce -from typing import Any, Tuple, Sequence +from typing import Any, Tuple, Union, Sequence from scipy.stats import pearsonr, spearmanr -from gn3.settings import TEXTDIR import pandas +from gn3.settings import TEXTDIR, ROUND_TO from gn3.data_helpers import parse_csv_line def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): @@ -276,8 +277,8 @@ def build_data_frame( def partial_correlation_matrix( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'variance-covariance matrix' method @@ -291,8 +292,8 @@ def partial_correlation_matrix( def partial_correlation_recursive( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'recursive formula' method @@ -302,4 +303,30 @@ def partial_correlation_recursive( GeneNetwork1, specifically the `pcor.rec` function written in the R programming language. """ - return 0 + assert method in ("pearson", "spearman", "kendall") + data = ( + build_data_frame(xdata, ydata, zdata).dropna(axis=0) + if omit_nones else + build_data_frame(xdata, ydata, zdata)) + + if data.shape[1] == 3: # z is a vector, not matrix + fields = { + "rxy": ("x", "y"), + "rxz": ("x", "z"), + "ryz": ("y", "z")} + tdata = { + corr_type: pandas.DataFrame( + {cols[0]: data[cols[0]], + cols[1]: data[cols[1]]}).dropna(axis=0) + for corr_type, cols in fields.items() + } + corrs = { + corr_type: tdata[corr_type][cols[0]].corr( + tdata[corr_type][cols[1]], method=method) + for corr_type, cols in fields.items() + } + return round(( + (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) / + (math.sqrt(1 - corrs["rxz"]**2) * + math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO) + return round(0, ROUND_TO) diff --git a/gn3/settings.py b/gn3/settings.py index 57c63df..eaf8f23 100644 --- a/gn3/settings.py +++ b/gn3/settings.py @@ -53,3 +53,5 @@ CORS_HEADERS = [ GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/") TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix" + +ROUND_TO = 10 diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py index b22bc62..981801a 100644 --- a/tests/unit/computations/test_partial_correlations.py +++ b/tests/unit/computations/test_partial_correlations.py @@ -4,6 +4,8 @@ import csv from unittest import TestCase import pandas + +from gn3.settings import ROUND_TO from gn3.computations.partial_correlations import ( fix_samples, control_samples, @@ -115,7 +117,7 @@ def parse_test_data_csv(filename): "z": __str__to_tuple(line, "z"), "method": methods[line["method"]], "rm": line["rm"] == "TRUE", - "result": float(line["result"]) + "result": round(float(line["result"]), ROUND_TO) } for line in lines) class TestPartialCorrelations(TestCase): -- cgit v1.2.3