aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-04 12:38:27 +0300
committerFrederick Muriuki Muriithi2021-11-04 12:38:27 +0300
commit0357f5c5e6eeb146eb259337019c87079363a256 (patch)
treefb6d03e8120ca983473568dec214a92251386c65
parent32e6d788ac5b6fa8daf4c26b2ad7bca32d71d828 (diff)
downloadgenenetwork3-0357f5c5e6eeb146eb259337019c87079363a256.tar.gz
Implement `build_data_frame`
Issue: https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi * gn3/computations/partial_correlations.py: new function (`build_data_frame`) * tests/unit/computations/test_partial_correlations.py: Add tests for new function Add a new function to build a pandas DataFrame object from the provided values: - x: a vector of floats (represented with a tuple of floats) - y: a vector of floats (represented with a tuple of floats) - z: a vector OR matrix of floats (represented with a tuple of floats or a tuple of tuples of floats)
-rw-r--r--gn3/computations/partial_correlations.py16
-rw-r--r--tests/unit/computations/test_partial_correlations.py22
2 files changed, 38 insertions, 0 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 07dc16d..ffdf0c5 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -10,6 +10,8 @@ from typing import Any, Tuple, Sequence
from scipy.stats import pearsonr, spearmanr
from gn3.settings import TEXTDIR
+import pandas
+
from gn3.data_helpers import parse_csv_line
def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
@@ -258,6 +260,20 @@ def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914]
else fetched_correlations[corr[0]][0:2])
for idx, corr in enumerate(all_correlations))
+def build_data_frame(
+ xdata: Tuple[float, ...], ydata: Tuple[float, ...],
+ zdata: Union[
+ Tuple[float, ...],
+ Tuple[Tuple[float, ...], ...]]) -> pandas.DataFrame:
+ """
+ Build a pandas DataFrame object from xdata, ydata and zdata
+ """
+ x_y_df = pandas.DataFrame({"x": xdata, "y": ydata})
+ if isinstance(zdata[0], float):
+ return x_y_df.join(pandas.DataFrame({"z": zdata}))
+ return x_y_df.join(pandas.DataFrame(
+ {"z{}".format(i): val for i, val in enumerate(row)} for row in zdata))
+
def partial_correlation_matrix(
xdata: Tuple[float, ...], ydata: Tuple[float, ...],
zdata: Tuple[float, ...], method: str = "pearsons",
diff --git a/tests/unit/computations/test_partial_correlations.py b/tests/unit/computations/test_partial_correlations.py
index c5c35d1..b22bc62 100644
--- a/tests/unit/computations/test_partial_correlations.py
+++ b/tests/unit/computations/test_partial_correlations.py
@@ -2,9 +2,12 @@
import csv
from unittest import TestCase
+
+import pandas
from gn3.computations.partial_correlations import (
fix_samples,
control_samples,
+ build_data_frame,
dictify_by_samples,
tissue_correlation,
find_identical_traits,
@@ -297,6 +300,25 @@ class TestPartialCorrelations(TestCase):
("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l")),
(0, 4, 8, 10))
+ def test_build_data_frame(self):
+ """
+ Check that the function builds the correct data frame.
+ """
+ for xdata, ydata, zdata, expected in (
+ ((0.1, 1.1, 2.1), (2.1, 3.1, 4.1), (5.1, 6.1 ,7.1),
+ pandas.DataFrame({
+ "x": (0.1, 1.1, 2.1), "y": (2.1, 3.1, 4.1),
+ "z": (5.1, 6.1 ,7.1)})),
+ ((0.1, 1.1, 2.1), (2.1, 3.1, 4.1),
+ ((5.1, 6.1 ,7.1), (5.2, 6.2, 7.2), (5.3, 6.3, 7.3)),
+ pandas.DataFrame({
+ "x": (0.1, 1.1, 2.1), "y": (2.1, 3.1, 4.1),
+ "z0": (5.1, 5.2 ,5.3), "z1": (6.1, 6.2 ,6.3),
+ "z2": (7.1, 7.2 ,7.3)}))):
+ with self.subTest(xdata=xdata, ydata=ydata, zdata=zdata):
+ self.assertTrue(
+ build_data_frame(xdata, ydata, zdata).equals(expected))
+
def test_partial_correlation_matrix(self):
"""
Test that `partial_correlation_matrix` computes the appropriate