aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/partial_correlations.py85
1 files changed, 78 insertions, 7 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 07dc16d..07a67be 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -5,11 +5,14 @@ It is an attempt to migrate over the partial correlations feature from
GeneNetwork1.
"""
+import math
from functools import reduce
-from typing import Any, Tuple, Sequence
+from typing import Any, Tuple, Union, Sequence
from scipy.stats import pearsonr, spearmanr
-from gn3.settings import TEXTDIR
+import pandas
+
+from gn3.settings import TEXTDIR, ROUND_TO
from gn3.data_helpers import parse_csv_line
def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
@@ -258,10 +261,27 @@ def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914]
else fetched_correlations[corr[0]][0:2])
for idx, corr in enumerate(all_correlations))
+def build_data_frame(
+ xdata: Tuple[float, ...], ydata: Tuple[float, ...],
+ zdata: Union[
+ Tuple[float, ...],
+ Tuple[Tuple[float, ...], ...]]) -> pandas.DataFrame:
+ """
+ Build a pandas DataFrame object from xdata, ydata and zdata
+ """
+ x_y_df = pandas.DataFrame({"x": xdata, "y": ydata})
+ if isinstance(zdata[0], float):
+ return x_y_df.join(pandas.DataFrame({"z": zdata}))
+ interm_df = x_y_df.join(pandas.DataFrame(
+ {"z{}".format(i): val for i, val in enumerate(row)} for row in zdata))
+ if interm_df.shape[1] == 3:
+ return interm_df.rename(columns={"z0": "z"})
+ return interm_df
+
def partial_correlation_matrix(
xdata: Tuple[float, ...], ydata: Tuple[float, ...],
- zdata: Tuple[float, ...], method: str = "pearsons",
- omit_nones: bool = True) -> float:
+ zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]],
+ method: str = "pearson", omit_nones: bool = True) -> float:
"""
Computes the partial correlation coefficient using the
'variance-covariance matrix' method
@@ -275,8 +295,8 @@ def partial_correlation_matrix(
def partial_correlation_recursive(
xdata: Tuple[float, ...], ydata: Tuple[float, ...],
- zdata: Tuple[float, ...], method: str = "pearsons",
- omit_nones: bool = True) -> float:
+ zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]],
+ method: str = "pearson", omit_nones: bool = True) -> float:
"""
Computes the partial correlation coefficient using the 'recursive formula'
method
@@ -286,4 +306,55 @@ def partial_correlation_recursive(
GeneNetwork1, specifically the `pcor.rec` function written in the R
programming language.
"""
- return 0
+ assert method in ("pearson", "spearman", "kendall")
+ data = (
+ build_data_frame(xdata, ydata, zdata).dropna(axis=0)
+ if omit_nones else
+ build_data_frame(xdata, ydata, zdata))
+
+ if data.shape[1] == 3: # z is a vector, not matrix
+ fields = {
+ "rxy": ("x", "y"),
+ "rxz": ("x", "z"),
+ "ryz": ("y", "z")}
+ tdata = {
+ corr_type: pandas.DataFrame(
+ {cols[0]: data[cols[0]],
+ cols[1]: data[cols[1]]}).dropna(axis=0)
+ for corr_type, cols in fields.items()
+ }
+ corrs = {
+ corr_type: tdata[corr_type][cols[0]].corr(
+ tdata[corr_type][cols[1]], method=method)
+ for corr_type, cols in fields.items()
+ }
+ return round((
+ (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) /
+ (math.sqrt(1 - corrs["rxz"]**2) *
+ math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO)
+
+ remaining_cols = [
+ colname for colname, series in data.items()
+ if colname not in ("x", "y", "z0")
+ ]
+
+ new_xdata = tuple(data["x"])
+ new_ydata = tuple(data["y"])
+ zc = tuple(
+ tuple(row_series[1])
+ for row_series in data[remaining_cols].iterrows())
+
+ rxy_zc = partial_correlation_recursive(
+ new_xdata, new_ydata, zc, method=method,
+ omit_nones=omit_nones)
+ rxz0_zc = partial_correlation_recursive(
+ new_xdata, tuple(data["z0"]), zc, method=method,
+ omit_nones=omit_nones)
+ ryz0_zc = partial_correlation_recursive(
+ new_ydata, tuple(data["z0"]), zc, method=method,
+ omit_nones=omit_nones)
+
+ return round(
+ ((rxy_zc - rxz0_zc * ryz0_zc) /(
+ math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))),
+ ROUND_TO)