aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-18 23:52:04 +0300
committerAlexander Kabui2021-04-18 23:52:04 +0300
commitd266ca9d59093c253ce7b56f9a14119869eb0003 (patch)
treeaf3fde7e4ad9ec7b34c8ec6811940e97e8d60d19
parentba1ea53443b8085700df2941e68421bcc8206c8b (diff)
downloadgenenetwork3-d266ca9d59093c253ce7b56f9a14119869eb0003.tar.gz
refactor:return trait_name in corr_results
-rw-r--r--gn3/computations/correlations.py27
-rw-r--r--tests/unit/computations/test_correlation.py15
2 files changed, 25 insertions, 17 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 1e95800..8410995 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -70,8 +70,8 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
return (corr_coeffient, p_val)
-def compute_sample_r_correlation(corr_method, trait_vals,
- target_samples_vals) -> Optional[Tuple[float, float, int]]:
+def compute_sample_r_correlation(trait_name, corr_method, trait_vals,
+ target_samples_vals) -> Optional[Tuple[str, float, float, int]]:
"""Given a primary trait values and target trait values calculate the
correlation coeff and p value
@@ -89,7 +89,7 @@ def compute_sample_r_correlation(corr_method, trait_vals,
# xtodo check if corr_coefficient is None
# should use numpy.isNan scipy.isNan is deprecated
if corr_coeffient is not None:
- return (corr_coeffient, p_value, num_overlap)
+ return (trait_name, corr_coeffient, p_value, num_overlap)
return None
@@ -123,24 +123,26 @@ def compute_all_sample_correlation(this_trait,
target__datasets compute all sample correlation
"""
# xtodo fix trait_name currently returning single one
+ # pylint: disable-msg=too-many-locals
this_trait_samples = this_trait["trait_sample_data"]
corr_results = []
processed_values = []
for target_trait in target_dataset:
- # trait_name = target_trait.get("trait_id")
+ trait_name = target_trait.get("trait_id")
target_trait_data = target_trait["trait_sample_data"]
# this_vals, target_vals = filter_shared_sample_keys(
# this_trait_samples, target_trait_data)
- processed_values.append((corr_method, *filter_shared_sample_keys(
+ processed_values.append((trait_name, corr_method, *filter_shared_sample_keys(
this_trait_samples, target_trait_data)))
with multiprocessing.Pool() as pool:
results = pool.starmap(compute_sample_r_correlation, processed_values)
for sample_correlation in results:
if sample_correlation is not None:
- (corr_coeffient, p_value, num_overlap) = sample_correlation
+ (trait_name, corr_coeffient, p_value,
+ num_overlap) = sample_correlation
corr_result = {
"corr_coeffient": corr_coeffient,
@@ -148,7 +150,7 @@ def compute_all_sample_correlation(this_trait,
"num_overlap": num_overlap
}
- corr_results.append({"trait_name_key": corr_result})
+ corr_results.append({trait_name: corr_result})
return sorted(
corr_results,
@@ -158,7 +160,9 @@ def compute_all_sample_correlation(this_trait,
def benchmark_compute_all_sample(this_trait,
target_dataset,
corr_method="pearson") ->List:
- """Temp function to benchmark with compute_all_sample_r
+ """Temp function to benchmark with compute_all_sample_r\
+ alternative to compute_all_sample_r where we use \
+ multiprocessing
"""
this_trait_samples = this_trait["trait_sample_data"]
@@ -166,18 +170,19 @@ def benchmark_compute_all_sample(this_trait,
corr_results = []
for target_trait in target_dataset:
- trait_id = target_trait.get("trait_id")
+ trait_name = target_trait.get("trait_id")
target_trait_data = target_trait["trait_sample_data"]
this_vals, target_vals = filter_shared_sample_keys(
this_trait_samples, target_trait_data)
sample_correlation = compute_sample_r_correlation(
+ trait_name=trait_name,
corr_method=corr_method,
trait_vals=this_vals,
target_samples_vals=target_vals)
if sample_correlation is not None:
- (corr_coeffient, p_value, num_overlap) = sample_correlation
+ (trait_name, corr_coeffient, p_value, num_overlap) = sample_correlation
else:
continue
@@ -188,7 +193,7 @@ def benchmark_compute_all_sample(this_trait,
"num_overlap": num_overlap
}
- corr_results.append({trait_id: corr_result})
+ corr_results.append({trait_name: corr_result})
return corr_results
diff --git a/tests/unit/computations/test_correlation.py b/tests/unit/computations/test_correlation.py
index 9f3feab..8bb5cd1 100644
--- a/tests/unit/computations/test_correlation.py
+++ b/tests/unit/computations/test_correlation.py
@@ -120,21 +120,24 @@ class TestCorrelation(TestCase):
[3.4, 6.2, 4, 1.1, 8, 1.1], 6)
compute_corr.side_effect = [(0.7, 0.3), (-1.0, 0.9), (1, 0.21)]
- pearson_results = compute_sample_r_correlation(corr_method="pearson",
+ pearson_results = compute_sample_r_correlation(trait_name="1412_at",
+ corr_method="pearson",
trait_vals=primary_values,
target_samples_vals=target_values)
- spearman_results = compute_sample_r_correlation(corr_method="spearman",
+ spearman_results = compute_sample_r_correlation(trait_name="1412_at",
+ corr_method="spearman",
trait_vals=primary_values,
target_samples_vals=target_values)
- bicor_results = compute_sample_r_correlation(corr_method="bicor",
+ bicor_results = compute_sample_r_correlation(trait_name="1412_at",
+ corr_method="bicor",
trait_vals=primary_values,
target_samples_vals=target_values)
- self.assertEqual(bicor_results, (1, 0.21, 6))
- self.assertEqual(pearson_results, (0.7, 0.3, 6))
- self.assertEqual(spearman_results, (-1.0, 0.9, 6))
+ self.assertEqual(bicor_results, ("1412_at", 1, 0.21, 6))
+ self.assertEqual(pearson_results, ("1412_at", 0.7, 0.3, 6))
+ self.assertEqual(spearman_results, ("1412_at", -1.0, 0.9, 6))
self.assertIsInstance(
pearson_results, tuple, "message")