diff options
Diffstat (limited to 'gn3/computations/correlations.py')
-rw-r--r-- | gn3/computations/correlations.py | 51 |
1 files changed, 22 insertions, 29 deletions
diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 7fb67be..fb62b56 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -1,4 +1,6 @@ """module contains code for correlations""" +import multiprocessing + from typing import List from typing import Tuple from typing import Optional @@ -7,11 +9,6 @@ from typing import Callable import scipy.stats -def compute_sum(rhs: int, lhs: int) -> int: - """Initial tests to compute sum of two numbers""" - return rhs + lhs - - def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict)-> List: """Function to construct target dataset data items given commoned shared\ keys and trait samplelist values for example given keys >>>>>>>>>>\ @@ -73,14 +70,12 @@ pearson,spearman and biweight mid correlation return value is rho and p_value return (corr_coeffient, p_val) -def compute_sample_r_correlation( - corr_method: str, trait_vals, - target_samples_vals) -> Optional[Tuple[float, float, int]]: +def compute_sample_r_correlation(corr_method, trait_vals, + target_samples_vals) -> Optional[Tuple[float, float, int]]: """Given a primary trait values and target trait values calculate the correlation coeff and p value """ - (sanitized_traits_vals, sanitized_target_vals, num_overlap) = normalize_values(trait_vals, target_samples_vals) @@ -127,35 +122,33 @@ def compute_all_sample_correlation(this_trait, """Given a trait data samplelist and\ target__datasets compute all sample correlation """ + # xtodo fix trait_name currently returning single one this_trait_samples = this_trait["trait_sample_data"] - corr_results = [] - + processed_values = [] for target_trait in target_dataset: - trait_id = target_trait.get("trait_id") + # trait_id = target_trait.get("trait_id") target_trait_data = target_trait["trait_sample_data"] - this_vals, target_vals = filter_shared_sample_keys( - this_trait_samples, target_trait_data) - - sample_correlation = compute_sample_r_correlation( - corr_method=corr_method, - trait_vals=this_vals, - target_samples_vals=target_vals) + # this_vals, target_vals = filter_shared_sample_keys( + # this_trait_samples, target_trait_data) - if sample_correlation is not None: - (corr_coeffient, p_value, num_overlap) = sample_correlation + processed_values.append((corr_method, *filter_shared_sample_keys( + this_trait_samples, target_trait_data))) + with multiprocessing.Pool() as pool: + results = pool.starmap(compute_sample_r_correlation, processed_values) - else: - continue + for sample_correlation in results: + if sample_correlation is not None: + (corr_coeffient, p_value, num_overlap) = sample_correlation - corr_result = { - "corr_coeffient": corr_coeffient, - "p_value": p_value, - "num_overlap": num_overlap - } + corr_result = { + "corr_coeffient": corr_coeffient, + "p_value": p_value, + "num_overlap": num_overlap + } - corr_results.append({trait_id: corr_result}) + corr_results.append({"trait_name_key": corr_result}) return corr_results |