about summary refs log tree commit diff
path: root/gn3/computations
diff options
context:
space:
mode:
authorFrederick Muriuki Muriithi2021-11-04 12:38:27 +0300
committerFrederick Muriuki Muriithi2021-11-04 12:38:27 +0300
commit0357f5c5e6eeb146eb259337019c87079363a256 (patch)
treefb6d03e8120ca983473568dec214a92251386c65 /gn3/computations
parent32e6d788ac5b6fa8daf4c26b2ad7bca32d71d828 (diff)
downloadgenenetwork3-0357f5c5e6eeb146eb259337019c87079363a256.tar.gz
Implement `build_data_frame`
Issue:
https://github.com/genenetwork/gn-gemtext-threads/blob/main/topics/gn1-migration-to-gn2/partial-correlations.gmi

* gn3/computations/partial_correlations.py: new function (`build_data_frame`)
* tests/unit/computations/test_partial_correlations.py: Add tests for new
  function

  Add a new function to build a pandas DataFrame object from the provided
  values:

  - x: a vector of floats (represented with a tuple of floats)
  - y: a vector of floats (represented with a tuple of floats)
  - z: a vector OR matrix of floats (represented with a tuple of floats or a
       tuple of tuples of floats)
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/partial_correlations.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/gn3/computations/partial_correlations.py b/gn3/computations/partial_correlations.py
index 07dc16d..ffdf0c5 100644
--- a/gn3/computations/partial_correlations.py
+++ b/gn3/computations/partial_correlations.py
@@ -10,6 +10,8 @@ from typing import Any, Tuple, Sequence
 from scipy.stats import pearsonr, spearmanr
 
 from gn3.settings import TEXTDIR
+import pandas
+
 from gn3.data_helpers import parse_csv_line
 
 def control_samples(controls: Sequence[dict], sampleslist: Sequence[str]):
@@ -258,6 +260,20 @@ def compute_partial_correlations_fast(# pylint: disable=[R0913, R0914]
             else fetched_correlations[corr[0]][0:2])
         for idx, corr in enumerate(all_correlations))
 
+def build_data_frame(
+        xdata: Tuple[float, ...], ydata: Tuple[float, ...],
+        zdata: Union[
+            Tuple[float, ...],
+            Tuple[Tuple[float, ...], ...]]) -> pandas.DataFrame:
+    """
+    Build a pandas DataFrame object from xdata, ydata and zdata
+    """
+    x_y_df = pandas.DataFrame({"x": xdata, "y": ydata})
+    if isinstance(zdata[0], float):
+        return x_y_df.join(pandas.DataFrame({"z": zdata}))
+    return x_y_df.join(pandas.DataFrame(
+        {"z{}".format(i): val for i, val in enumerate(row)} for row in zdata))
+
 def partial_correlation_matrix(
         xdata: Tuple[float, ...], ydata: Tuple[float, ...],
         zdata: Tuple[float, ...], method: str = "pearsons",