diff options
author | Frederick Muriuki Muriithi | 2021-11-15 07:58:10 +0300 |
---|---|---|
committer | Frederick Muriuki Muriithi | 2021-11-15 07:58:10 +0300 |
commit | 29fc003070b45f61e7ab1048a818201b5beb9298 (patch) | |
tree | 9e8be7a4c7b41d0e61cadf9a1d140fac86f4b425 /gn3/computations | |
parent | 7d3929486142481ba0fe36f302845accf8b497ee (diff) | |
download | genenetwork3-29fc003070b45f61e7ab1048a818201b5beb9298.tar.gz |
Fix bugs in recursive partial correlations
* gn3/computations/partial_correlations.py: Remove rounding. Fix computation
of remaining covariates
*
tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt:
reduce the number of covariates to between one (1) and three (3)
* tests/unit/computations/test_partial_correlations.py: fix some minor bugs
It turns out that the computation complexity increases exponentially, with
the number of covariates. Therefore, to get a somewhat sensible test time,
while retaining a large-ish number of tests, this commit reduces the number
of covariates to between 1 and 3.
Diffstat (limited to 'gn3/computations')
-rw-r--r-- | gn3/computations/partial_correlations.py | 16 |
1 files changed, 6 insertions, 10 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py index 156e74c..519dce9 100644 --- a/gn3/computations/partial_correlations.py +++ b/gn3/computations/partial_correlations.py @@ -12,7 +12,7 @@ from scipy.stats import pearsonr, spearmanr import pandas -from gn3.settings import TEXTDIR, ROUND_TO +from gn3.settings import TEXTDIR from gn3.data_helpers import parse_csv_line def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]): @@ -328,10 +328,10 @@ def partial_correlation_recursive( tdata[corr_type][cols[1]], method=method) for corr_type, cols in fields.items() } - return round(( + return ( (corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) / (math.sqrt(1 - corrs["rxz"]**2) * - math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO) + math.sqrt(1 - corrs["ryz"]**2))) remaining_cols = [ colname for colname, series in data.items() @@ -340,9 +340,7 @@ def partial_correlation_recursive( new_xdata = tuple(data["x"]) new_ydata = tuple(data["y"]) - zc = tuple( - tuple(row_series[1]) - for row_series in data[remaining_cols].iterrows()) + zc = tuple(tuple(data[colname]) for colname in data[remaining_cols].columns) rxy_zc = partial_correlation_recursive( new_xdata, new_ydata, zc, method=method, @@ -354,7 +352,5 @@ def partial_correlation_recursive( new_ydata, tuple(data["z0"]), zc, method=method, omit_nones=omit_nones) - return round( - ((rxy_zc - rxz0_zc * ryz0_zc) /( - math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))), - ROUND_TO) + return ((rxy_zc - rxz0_zc * ryz0_zc) /( + math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))) |