about summary refs log tree commit diff
path: root/gn3
diff options
context:
space:
mode:
Diffstat (limited to 'gn3')
-rw-r--r--gn3/computations/correlations.py27
1 files changed, 16 insertions, 11 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 1e95800..8410995 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -70,8 +70,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
     return (corr_coeffient, p_val)
 
 
-def compute_sample_r_correlation(corr_method, trait_vals,
-                                 target_samples_vals) -> Optional[Tuple[float, float, int]]:
+def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
+                                 target_samples_vals) -> Optional[Tuple[str, float, float, int]]:
     """Given a primary trait values and target trait values calculate the
     correlation coeff and p value
 
@@ -89,7 +89,7 @@ def compute_sample_r_correlation(corr_method, trait_vals,
         # xtodo check if corr_coefficient is None
         # should use numpy.isNan scipy.isNan is deprecated
         if corr_coeffient is not None:
-            return (corr_coeffient, p_value, num_overlap)
+            return (trait_name, corr_coeffient, p_value, num_overlap)
 
     return None
 
@@ -123,24 +123,26 @@ def compute_all_sample_correlation(this_trait,
     target__datasets compute all sample correlation
     """
     # xtodo fix trait_name currently returning single one
+    # pylint: disable-msg=too-many-locals
 
     this_trait_samples = this_trait["trait_sample_data"]
     corr_results = []
     processed_values = []
     for target_trait in target_dataset:
-        # trait_name = target_trait.get("trait_id")
+        trait_name = target_trait.get("trait_id")
         target_trait_data = target_trait["trait_sample_data"]
         # this_vals, target_vals = filter_shared_sample_keys(
         #     this_trait_samples, target_trait_data)
 
-        processed_values.append((corr_method, *filter_shared_sample_keys(
+        processed_values.append((trait_name, corr_method, *filter_shared_sample_keys(
             this_trait_samples, target_trait_data)))
     with multiprocessing.Pool() as pool:
         results = pool.starmap(compute_sample_r_correlation, processed_values)
 
         for sample_correlation in results:
             if sample_correlation is not None:
-                (corr_coeffient, p_value, num_overlap) = sample_correlation
+                (trait_name, corr_coeffient, p_value,
+                 num_overlap) = sample_correlation
 
                 corr_result = {
                     "corr_coeffient": corr_coeffient,
@@ -148,7 +150,7 @@ def compute_all_sample_correlation(this_trait,
                     "num_overlap": num_overlap
                 }
 
-                corr_results.append({"trait_name_key": corr_result})
+                corr_results.append({trait_name: corr_result})
 
     return sorted(
         corr_results,
@@ -158,7 +160,9 @@ def compute_all_sample_correlation(this_trait,
 def benchmark_compute_all_sample(this_trait,
                                  target_dataset,
                                  corr_method="pearson") ->List:
-    """Temp function to benchmark with compute_all_sample_r
+    """Temp function to benchmark with compute_all_sample_r\
+    alternative to compute_all_sample_r where we use \
+    multiprocessing
     """
 
     this_trait_samples = this_trait["trait_sample_data"]
@@ -166,18 +170,19 @@ def benchmark_compute_all_sample(this_trait,
     corr_results = []
 
     for target_trait in target_dataset:
-        trait_id = target_trait.get("trait_id")
+        trait_name = target_trait.get("trait_id")
         target_trait_data = target_trait["trait_sample_data"]
         this_vals, target_vals = filter_shared_sample_keys(
             this_trait_samples, target_trait_data)
 
         sample_correlation = compute_sample_r_correlation(
+            trait_name=trait_name,
             corr_method=corr_method,
             trait_vals=this_vals,
             target_samples_vals=target_vals)
 
         if sample_correlation is not None:
-            (corr_coeffient, p_value, num_overlap) = sample_correlation
+            (trait_name, corr_coeffient, p_value, num_overlap) = sample_correlation
 
         else:
             continue
@@ -188,7 +193,7 @@ def benchmark_compute_all_sample(this_trait,
             "num_overlap": num_overlap
         }
 
-        corr_results.append({trait_id: corr_result})
+        corr_results.append({trait_name: corr_result})
 
     return corr_results