about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-04 12:43:28 +0300
committerFrederick Muriuki Muriithi2021-11-04 12:43:28 +0300
commit9647226ea4c85449581df713c2bb583aeed6940f (patch)
tree15fa41d729552af2dac2843f91fa792241102061
parent0357f5c5e6eeb146eb259337019c87079363a256 (diff)
downloadgenenetwork3-9647226ea4c85449581df713c2bb583aeed6940f.tar.gz
Partially implement `partial_correlation_recursive`
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/computations/partial_correlations.py: Implement one path for the
  `gn3.computations.partial_correlations.partial_correlation_recursive`
  function.
* gn3/settings.py: Add a setting for how many decimal places to round to
* tests/unit/computations/test_partial_correlations.py: Update test to take
  the number of decimal places into consideration

  Implement a single path (where the z value is a vector and not a matrix) for
  the `partial_correlation_recursive` function.
-rw-r--r--gn3/computations/partial_correlations.py41
-rw-r--r--gn3/settings.py2
-rw-r--r--tests/unit/computations/test_partial_correlations.py4
3 files changed, 39 insertions, 8 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index ffdf0c5..bd127a7 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -5,13 +5,14 @@ It is an attempt to migrate over the partial correlations feature from
 GeneNetwork1.
 """
 
+import math
 from functools import reduce
-from typing import Any, Tuple, Sequence
+from typing import Any, Tuple, Union, Sequence
 from scipy.stats import pearsonr, spearmanr
 
-from gn3.settings import TEXTDIR
 import pandas
 
+from gn3.settings import TEXTDIR, ROUND_TO
 from gn3.data_helpers import parse_csv_line
 
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
@@ -276,8 +277,8 @@ def build_data_frame(
 
 def partial_correlation_matrix(
         xdata: Tuple[float, ...], ydata: Tuple[float, ...],
-        zdata: Tuple[float, ...], method: str = "pearsons",
-        omit_nones: bool = True) -> float:
+        zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]],
+        method: str = "pearson", omit_nones: bool = True) -> float:
     """
     Computes the partial correlation coefficient using the
     'variance-covariance matrix' method
@@ -291,8 +292,8 @@ def partial_correlation_matrix(
 
 def partial_correlation_recursive(
         xdata: Tuple[float, ...], ydata: Tuple[float, ...],
-        zdata: Tuple[float, ...], method: str = "pearsons",
-        omit_nones: bool = True) -> float:
+        zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]],
+        method: str = "pearson", omit_nones: bool = True) -> float:
     """
     Computes the partial correlation coefficient using the 'recursive formula'
     method
@@ -302,4 +303,30 @@ def partial_correlation_recursive(
     GeneNetwork1, specifically the `pcor.rec` function written in the R
     programming language.
     """
-    return 0
+    assert method in ("pearson", "spearman", "kendall")
+    data = (
+        build_data_frame(xdata, ydata, zdata).dropna(axis=0)
+        if omit_nones else
+        build_data_frame(xdata, ydata, zdata))
+
+    if data.shape[1] == 3: # z is a vector, not matrix
+        fields = {
+            "rxy": ("x", "y"),
+            "rxz": ("x", "z"),
+            "ryz": ("y", "z")}
+        tdata = {
+            corr_type: pandas.DataFrame(
+                {cols[0]: data[cols[0]],
+                 cols[1]: data[cols[1]]}).dropna(axis=0)
+            for corr_type, cols in fields.items()
+        }
+        corrs = {
+            corr_type: tdata[corr_type][cols[0]].corr(
+                tdata[corr_type][cols[1]], method=method)
+            for corr_type, cols in fields.items()
+        }
+        return round((
+            (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) /
+            (math.sqrt(1 - corrs["rxz"]**2) *
+             math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO)
+    return round(0, ROUND_TO)
diff --git a/gn3/settings.py b/gn3/settings.py
index 57c63df..eaf8f23 100644
--- a/gn3/settings.py
+++ b/gn3/settings.py
@@ -53,3 +53,5 @@ CORS_HEADERS = [
 
 GNSHARE = os.environ.get("GNSHARE", "/gnshare/gn/")
 TEXTDIR = f"{GNSHARE}/web/ProbeSetFreeze_DataMatrix"
+
+ROUND_TO = 10
diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py
index b22bc62..981801a 100644
--- a/tests/unit/computations/test_partial_correlations.py
+++ b/tests/unit/computations/test_partial_correlations.py
@@ -4,6 +4,8 @@ import csv
 from unittest import TestCase
 
 import pandas
+
+from gn3.settings import ROUND_TO
 from gn3.computations.partial_correlations import (
     fix_samples,
     control_samples,
@@ -115,7 +117,7 @@ def parse_test_data_csv(filename):
         "z": __str__to_tuple(line, "z"),
         "method": methods[line["method"]],
         "rm": line["rm"] == "TRUE",
-        "result": float(line["result"])
+        "result": round(float(line["result"]), ROUND_TO)
     } for line in lines)
 
 class TestPartialCorrelations(TestCase):