about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-18 23:52:04 +0300
committerAlexander Kabui2021-04-18 23:52:04 +0300
commitd266ca9d59093c253ce7b56f9a14119869eb0003 (patch)
treeaf3fde7e4ad9ec7b34c8ec6811940e97e8d60d19
parentba1ea53443b8085700df2941e68421bcc8206c8b (diff)
downloadgenenetwork3-d266ca9d59093c253ce7b56f9a14119869eb0003.tar.gz
refactor:return trait_name in corr_results
-rw-r--r--gn3/computations/correlations.py27
-rw-r--r--tests/unit/computations/test_correlation.py15
2 files changed, 25 insertions, 17 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 1e95800..8410995 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -70,8 +70,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
     return (corr_coeffient, p_val)
 
 
-def compute_sample_r_correlation(corr_method, trait_vals,
-                                 target_samples_vals) -> Optional[Tuple[float, float, int]]:
+def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
+                                 target_samples_vals) -> Optional[Tuple[str, float, float, int]]:
     """Given a primary trait values and target trait values calculate the
     correlation coeff and p value
 
@@ -89,7 +89,7 @@ def compute_sample_r_correlation(corr_method, trait_vals,
         # xtodo check if corr_coefficient is None
         # should use numpy.isNan scipy.isNan is deprecated
         if corr_coeffient is not None:
-            return (corr_coeffient, p_value, num_overlap)
+            return (trait_name, corr_coeffient, p_value, num_overlap)
 
     return None
 
@@ -123,24 +123,26 @@ def compute_all_sample_correlation(this_trait,
     target__datasets compute all sample correlation
     """
     # xtodo fix trait_name currently returning single one
+    # pylint: disable-msg=too-many-locals
 
     this_trait_samples = this_trait["trait_sample_data"]
     corr_results = []
     processed_values = []
     for target_trait in target_dataset:
-        # trait_name = target_trait.get("trait_id")
+        trait_name = target_trait.get("trait_id")
         target_trait_data = target_trait["trait_sample_data"]
         # this_vals, target_vals = filter_shared_sample_keys(
         #     this_trait_samples, target_trait_data)
 
-        processed_values.append((corr_method, *filter_shared_sample_keys(
+        processed_values.append((trait_name, corr_method, *filter_shared_sample_keys(
             this_trait_samples, target_trait_data)))
     with multiprocessing.Pool() as pool:
         results = pool.starmap(compute_sample_r_correlation, processed_values)
 
         for sample_correlation in results:
             if sample_correlation is not None:
-                (corr_coeffient, p_value, num_overlap) = sample_correlation
+                (trait_name, corr_coeffient, p_value,
+                 num_overlap) = sample_correlation
 
                 corr_result = {
                     "corr_coeffient": corr_coeffient,
@@ -148,7 +150,7 @@ def compute_all_sample_correlation(this_trait,
                     "num_overlap": num_overlap
                 }
 
-                corr_results.append({"trait_name_key": corr_result})
+                corr_results.append({trait_name: corr_result})
 
     return sorted(
         corr_results,
@@ -158,7 +160,9 @@ def compute_all_sample_correlation(this_trait,
 def benchmark_compute_all_sample(this_trait,
                                  target_dataset,
                                  corr_method="pearson") ->List:
-    """Temp function to benchmark with compute_all_sample_r
+    """Temp function to benchmark with compute_all_sample_r\
+    alternative to compute_all_sample_r where we use \
+    multiprocessing
     """
 
     this_trait_samples = this_trait["trait_sample_data"]
@@ -166,18 +170,19 @@ def benchmark_compute_all_sample(this_trait,
     corr_results = []
 
     for target_trait in target_dataset:
-        trait_id = target_trait.get("trait_id")
+        trait_name = target_trait.get("trait_id")
         target_trait_data = target_trait["trait_sample_data"]
         this_vals, target_vals = filter_shared_sample_keys(
             this_trait_samples, target_trait_data)
 
         sample_correlation = compute_sample_r_correlation(
+            trait_name=trait_name,
             corr_method=corr_method,
             trait_vals=this_vals,
             target_samples_vals=target_vals)
 
         if sample_correlation is not None:
-            (corr_coeffient, p_value, num_overlap) = sample_correlation
+            (trait_name, corr_coeffient, p_value, num_overlap) = sample_correlation
 
         else:
             continue
@@ -188,7 +193,7 @@ def benchmark_compute_all_sample(this_trait,
             "num_overlap": num_overlap
         }
 
-        corr_results.append({trait_id: corr_result})
+        corr_results.append({trait_name: corr_result})
 
     return corr_results
 
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
index 9f3feab..8bb5cd1 100644
--- a/tests/unit/computations/test_correlation.py
+++ b/tests/unit/computations/test_correlation.py
@@ -120,21 +120,24 @@ class TestCorrelation(TestCase):
                                   [3.4, 6.2, 4, 1.1, 8, 1.1], 6)
         compute_corr.side_effect = [(0.7, 0.3), (-1.0, 0.9), (1, 0.21)]
 
-        pearson_results = compute_sample_r_correlation(corr_method="pearson",
+        pearson_results = compute_sample_r_correlation(trait_name="1412_at",
+                                                       corr_method="pearson",
                                                        trait_vals=primary_values,
                                                        target_samples_vals=target_values)
 
-        spearman_results = compute_sample_r_correlation(corr_method="spearman",
+        spearman_results = compute_sample_r_correlation(trait_name="1412_at",
+                                                        corr_method="spearman",
                                                         trait_vals=primary_values,
                                                         target_samples_vals=target_values)
 
-        bicor_results = compute_sample_r_correlation(corr_method="bicor",
+        bicor_results = compute_sample_r_correlation(trait_name="1412_at",
+                                                     corr_method="bicor",
                                                      trait_vals=primary_values,
                                                      target_samples_vals=target_values)
 
-        self.assertEqual(bicor_results, (1, 0.21, 6))
-        self.assertEqual(pearson_results, (0.7, 0.3, 6))
-        self.assertEqual(spearman_results, (-1.0, 0.9, 6))
+        self.assertEqual(bicor_results, ("1412_at", 1, 0.21, 6))
+        self.assertEqual(pearson_results, ("1412_at", 0.7, 0.3, 6))
+        self.assertEqual(spearman_results, ("1412_at", -1.0, 0.9, 6))
 
         self.assertIsInstance(
             pearson_results, tuple, "message")