diff options
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/partial_correlations.py | 85 |
1 files changed, 78 insertions, 7 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 07dc16d..07a67be 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -5,11 +5,14 @@ It is an attempt to migrate over the partial correlations feature from GeneNetwork1. """ +import math from functools import reduce -from typing import Any, Tuple, Sequence +from typing import Any, Tuple, Union, Sequence from scipy.stats import pearsonr, spearmanr -from gn3.settings import TEXTDIR +import pandas + +from gn3.settings import TEXTDIR, ROUND_TO from gn3.data_helpers import parse_csv_line def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): @@ -258,10 +261,27 @@ def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914] else fetched_correlations[corr[0]][0:2]) for idx, corr in enumerate(all_correlations)) +def build_data_frame( + xdata: Tuple[float, ...], ydata: Tuple[float, ...], + zdata: Union[ + Tuple[float, ...], + Tuple[Tuple[float, ...], ...]]) -> pandas.DataFrame: + """ + Build a pandas DataFrame object from xdata, ydata and zdata + """ + x_y_df = pandas.DataFrame({"x": xdata, "y": ydata}) + if isinstance(zdata[0], float): + return x_y_df.join(pandas.DataFrame({"z": zdata})) + interm_df = x_y_df.join(pandas.DataFrame( + {"z{}".format(i): val for i, val in enumerate(row)} for row in zdata)) + if interm_df.shape[1] == 3: + return interm_df.rename(columns={"z0": "z"}) + return interm_df + def partial_correlation_matrix( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'variance-covariance matrix' method @@ -275,8 +295,8 @@ def partial_correlation_matrix( def partial_correlation_recursive( xdata: Tuple[float, ...], ydata: Tuple[float, ...], - zdata: Tuple[float, ...], method: str = "pearsons", - omit_nones: bool = True) -> float: + zdata: Union[Tuple[float, ...], Tuple[Tuple[float, ...], ...]], + method: str = "pearson", omit_nones: bool = True) -> float: """ Computes the partial correlation coefficient using the 'recursive formula' method @@ -286,4 +306,55 @@ def partial_correlation_recursive( GeneNetwork1, specifically the `pcor.rec` function written in the R programming language. """ - return 0 + assert method in ("pearson", "spearman", "kendall") + data = ( + build_data_frame(xdata, ydata, zdata).dropna(axis=0) + if omit_nones else + build_data_frame(xdata, ydata, zdata)) + + if data.shape[1] == 3: # z is a vector, not matrix + fields = { + "rxy": ("x", "y"), + "rxz": ("x", "z"), + "ryz": ("y", "z")} + tdata = { + corr_type: pandas.DataFrame( + {cols[0]: data[cols[0]], + cols[1]: data[cols[1]]}).dropna(axis=0) + for corr_type, cols in fields.items() + } + corrs = { + corr_type: tdata[corr_type][cols[0]].corr( + tdata[corr_type][cols[1]], method=method) + for corr_type, cols in fields.items() + } + return round(( + (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) / + (math.sqrt(1 - corrs["rxz"]**2) * + math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO) + + remaining_cols = [ + colname for colname, series in data.items() + if colname not in ("x", "y", "z0") + ] + + new_xdata = tuple(data["x"]) + new_ydata = tuple(data["y"]) + zc = tuple( + tuple(row_series[1]) + for row_series in data[remaining_cols].iterrows()) + + rxy_zc = partial_correlation_recursive( + new_xdata, new_ydata, zc, method=method, + omit_nones=omit_nones) + rxz0_zc = partial_correlation_recursive( + new_xdata, tuple(data["z0"]), zc, method=method, + omit_nones=omit_nones) + ryz0_zc = partial_correlation_recursive( + new_ydata, tuple(data["z0"]), zc, method=method, + omit_nones=omit_nones) + + return round( + ((rxy_zc - rxz0_zc * ryz0_zc) /( + math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))), + ROUND_TO) |