aboutsummaryrefslogtreecommitdiff
path: root/gn3
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-15 07:58:10 +0300
committerFrederick Muriuki Muriithi2021-11-15 07:58:10 +0300
commit29fc003070b45f61e7ab1048a818201b5beb9298 (patch)
tree9e8be7a4c7b41d0e61cadf9a1d140fac86f4b425 /gn3
parent7d3929486142481ba0fe36f302845accf8b497ee (diff)
downloadgenenetwork3-29fc003070b45f61e7ab1048a818201b5beb9298.tar.gz
Fix bugs in recursive partial correlations
* gn3/computations/partial_correlations.py: Remove rounding. Fix computation of remaining covariates * tests/unit/computations/partial_correlations_test_data/pcor_rec_blackbox_test.txt: reduce the number of covariates to between one (1) and three (3) * tests/unit/computations/test_partial_correlations.py: fix some minor bugs It turns out that the computation complexity increases exponentially, with the number of covariates. Therefore, to get a somewhat sensible test time, while retaining a large-ish number of tests, this commit reduces the number of covariates to between 1 and 3.
Diffstat (limited to 'gn3')
-rw-r--r--gn3/computations/partial_correlations.py16
1 files changed, 6 insertions, 10 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 156e74c..519dce9 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -12,7 +12,7 @@ from scipy.stats import pearsonr, spearmanr
import pandas
-from gn3.settings import TEXTDIR, ROUND_TO
+from gn3.settings import TEXTDIR
from gn3.data_helpers import parse_csv_line
def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
@@ -328,10 +328,10 @@ def partial_correlation_recursive(
tdata[corr_type][cols[1]], method=method)
for corr_type, cols in fields.items()
}
- return round((
+ return (
(corrs["rxy"] - corrs["rxz"] * corrs["ryz"]) /
(math.sqrt(1 - corrs["rxz"]**2) *
- math.sqrt(1 - corrs["ryz"]**2))), ROUND_TO)
+ math.sqrt(1 - corrs["ryz"]**2)))
remaining_cols = [
colname for colname, series in data.items()
@@ -340,9 +340,7 @@ def partial_correlation_recursive(
new_xdata = tuple(data["x"])
new_ydata = tuple(data["y"])
- zc = tuple(
- tuple(row_series[1])
- for row_series in data[remaining_cols].iterrows())
+ zc = tuple(tuple(data[colname]) for colname in data[remaining_cols].columns)
rxy_zc = partial_correlation_recursive(
new_xdata, new_ydata, zc, method=method,
@@ -354,7 +352,5 @@ def partial_correlation_recursive(
new_ydata, tuple(data["z0"]), zc, method=method,
omit_nones=omit_nones)
- return round(
- ((rxy_zc - rxz0_zc * ryz0_zc) /(
- math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2))),
- ROUND_TO)
+ return ((rxy_zc - rxz0_zc * ryz0_zc) /(
+ math.sqrt(1 - rxz0_zc**2) * math.sqrt(1 - ryz0_zc**2)))