aboutsummaryrefslogtreecommitdiff
path: root/gn3/computations
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-15 02:17:30 +0300
committerAlexander Kabui2021-04-15 02:17:30 +0300
commitf3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2 (patch)
treeb9c8cef828e7dc1f19a85da3807ea0ad77bd690a /gn3/computations
parentff80b5228e741c24d66d1d1c13702a34aac77a78 (diff)
downloadgenenetwork3-f3f68f8eb92c7ec9c42bc20bc8e94c435cc745e2.tar.gz
optimization for sample correlation
Diffstat (limited to 'gn3/computations')
-rw-r--r--gn3/computations/correlations.py51
1 files changed, 22 insertions, 29 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py
index 7fb67be..fb62b56 100644
--- a/gn3/computations/correlations.py
+++ b/gn3/computations/correlations.py
@@ -1,4 +1,6 @@
"""module contains code for correlations"""
+import multiprocessing
+
from typing import List
from typing import Tuple
from typing import Optional
@@ -7,11 +9,6 @@ from typing import Callable
import scipy.stats
-def compute_sum(rhs: int, lhs: int) -> int:
- """Initial tests to compute sum of two numbers"""
- return rhs + lhs
-
-
def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict)-> List:
"""Function to construct target dataset data items given commoned shared\
keys and trait samplelist values for example given keys >>>>>>>>>>\
@@ -73,14 +70,12 @@ pearson,spearman and biweight mid correlation return value is rho and p_value
return (corr_coeffient, p_val)
-def compute_sample_r_correlation(
- corr_method: str, trait_vals,
- target_samples_vals) -> Optional[Tuple[float, float, int]]:
+def compute_sample_r_correlation(corr_method, trait_vals,
+ target_samples_vals) -> Optional[Tuple[float, float, int]]:
"""Given a primary trait values and target trait values calculate the
correlation coeff and p value
"""
-
(sanitized_traits_vals, sanitized_target_vals,
num_overlap) = normalize_values(trait_vals, target_samples_vals)
@@ -127,35 +122,33 @@ def compute_all_sample_correlation(this_trait,
"""Given a trait data samplelist and\
target__datasets compute all sample correlation
"""
+ # xtodo fix trait_name currently returning single one
this_trait_samples = this_trait["trait_sample_data"]
-
corr_results = []
-
+ processed_values = []
for target_trait in target_dataset:
- trait_id = target_trait.get("trait_id")
+ # trait_id = target_trait.get("trait_id")
target_trait_data = target_trait["trait_sample_data"]
- this_vals, target_vals = filter_shared_sample_keys(
- this_trait_samples, target_trait_data)
-
- sample_correlation = compute_sample_r_correlation(
- corr_method=corr_method,
- trait_vals=this_vals,
- target_samples_vals=target_vals)
+ # this_vals, target_vals = filter_shared_sample_keys(
+ # this_trait_samples, target_trait_data)
- if sample_correlation is not None:
- (corr_coeffient, p_value, num_overlap) = sample_correlation
+ processed_values.append((corr_method, *filter_shared_sample_keys(
+ this_trait_samples, target_trait_data)))
+ with multiprocessing.Pool() as pool:
+ results = pool.starmap(compute_sample_r_correlation, processed_values)
- else:
- continue
+ for sample_correlation in results:
+ if sample_correlation is not None:
+ (corr_coeffient, p_value, num_overlap) = sample_correlation
- corr_result = {
- "corr_coeffient": corr_coeffient,
- "p_value": p_value,
- "num_overlap": num_overlap
- }
+ corr_result = {
+ "corr_coeffient": corr_coeffient,
+ "p_value": p_value,
+ "num_overlap": num_overlap
+ }
- corr_results.append({trait_id: corr_result})
+ corr_results.append({"trait_name_key": corr_result})
return corr_results