optimization for sample correlation

author: Alexander Kabui 2021-04-15 02:17:30 +0300
committer: Alexander Kabui 2021-04-15 02:17:30 +0300
commit: f3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2 (patch)
tree: b9c8cef828e7dc1f19a85da3807ea0ad77bd690a /gn3/computations
parent: ff80b5228e741c24d66d1d1c13702a34aac77a78 (diff)
download: genenetwork3-f3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2.tar.gz
1 files changed, 22 insertions, 29 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 7fb67be..fb62b56 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -1,4 +1,6 @@
 """module contains code for correlations"""
+import multiprocessing
+
 from typing import List
 from typing import Tuple
 from typing import Optional
@@ -7,11 +9,6 @@ from typing import Callable
 import scipy.stats
 
 
-def compute_sum(rhs: int, lhs: int) -> int:
-    """Initial tests to compute sum of two numbers"""
-    return rhs + lhs
-
-
 def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict)-> List:
     """Function to construct target dataset data items given commoned shared\
     keys and trait samplelist values for example given keys  >>>>>>>>>>\
@@ -73,14 +70,12 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
     return (corr_coeffient, p_val)
 
 
-def compute_sample_r_correlation(
-        corr_method: str, trait_vals,
-        target_samples_vals) -> Optional[Tuple[float, float, int]]:
+def compute_sample_r_correlation(corr_method, trait_vals,
+                                 target_samples_vals) -> Optional[Tuple[float, float, int]]:
     """Given a primary trait values and target trait values calculate the
     correlation coeff and p value
 
     """
-
     (sanitized_traits_vals, sanitized_target_vals,
      num_overlap) = normalize_values(trait_vals, target_samples_vals)
 
@@ -127,35 +122,33 @@ def compute_all_sample_correlation(this_trait,
     """Given a trait data samplelist and\
     target__datasets compute all sample correlation
     """
+    # xtodo fix trait_name currently returning single one
 
     this_trait_samples = this_trait["trait_sample_data"]
-
     corr_results = []
-
+    processed_values = []
     for target_trait in target_dataset:
-        trait_id = target_trait.get("trait_id")
+        # trait_id = target_trait.get("trait_id")
         target_trait_data = target_trait["trait_sample_data"]
-        this_vals, target_vals = filter_shared_sample_keys(
-            this_trait_samples, target_trait_data)
-
-        sample_correlation = compute_sample_r_correlation(
-            corr_method=corr_method,
-            trait_vals=this_vals,
-            target_samples_vals=target_vals)
+        # this_vals, target_vals = filter_shared_sample_keys(
+        #     this_trait_samples, target_trait_data)
 
-        if sample_correlation is not None:
-            (corr_coeffient, p_value, num_overlap) = sample_correlation
+        processed_values.append((corr_method, *filter_shared_sample_keys(
+            this_trait_samples, target_trait_data)))
+    with multiprocessing.Pool() as pool:
+        results = pool.starmap(compute_sample_r_correlation, processed_values)
 
-        else:
-            continue
+        for sample_correlation in results:
+            if sample_correlation is not None:
+                (corr_coeffient, p_value, num_overlap) = sample_correlation
 
-        corr_result = {
-            "corr_coeffient": corr_coeffient,
-            "p_value": p_value,
-            "num_overlap": num_overlap
-        }
+                corr_result = {
+                    "corr_coeffient": corr_coeffient,
+                    "p_value": p_value,
+                    "num_overlap": num_overlap
+                }
 
-        corr_results.append({trait_id: corr_result})
+                corr_results.append({"trait_name_key": corr_result})
 
     return corr_results
author	Alexander Kabui	2021-04-15 02:17:30 +0300
committer	Alexander Kabui	2021-04-15 02:17:30 +0300
commit	f3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2 (patch)
tree	b9c8cef828e7dc1f19a85da3807ea0ad77bd690a /gn3/computations
parent	ff80b5228e741c24d66d1d1c13702a34aac77a78 (diff)
download	genenetwork3-f3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2.tar.gz