diff options
Diffstat (limited to 'gn3')
-rw-r--r-- | gn3/api/correlation.py | 5 | ||||
-rw-r--r-- | gn3/computations/correlations.py | 51 |
2 files changed, 26 insertions, 30 deletions
diff --git a/gn3/api/correlation.py b/gn3/api/correlation.py index f28e1f5..7be8e30 100644 --- a/gn3/api/correlation.py +++ b/gn3/api/correlation.py @@ -16,6 +16,8 @@ correlation = Blueprint("correlation", __name__) def compute_sample_integration(corr_method="pearson"): """temporary api to help integrate genenetwork2 to genenetwork3 """ + # for debug + print("Calling this endpoint") correlation_input = request.get_json() target_samplelist = correlation_input.get("target_samplelist") @@ -23,7 +25,6 @@ def compute_sample_integration(corr_method="pearson"): this_trait_data = correlation_input.get("trait_data") results = map_shared_keys_to_values(target_samplelist, target_data_values) - correlation_results = compute_all_sample_correlation(corr_method=corr_method, this_trait=this_trait_data, target_dataset=results) @@ -75,6 +76,8 @@ def compute_lit_corr(species=None, gene_id=None): @correlation.route("/tissue_corr/<string:corr_method>", methods=["POST"]) def compute_tissue_corr(corr_method="pearson"): """Api endpoint fr doing tissue correlation""" + # for debug + print("The request has been received") tissue_input_data = request.get_json() primary_tissue_dict = tissue_input_data["primary_tissue"] target_tissues_dict = tissue_input_data["target_tissues_dict"] diff --git a/gn3/computations/correlations.py b/gn3/computations/correlations.py index 7fb67be..fb62b56 100644 --- a/gn3/computations/correlations.py +++ b/gn3/computations/correlations.py @@ -1,4 +1,6 @@ """module contains code for correlations""" +import multiprocessing + from typing import List from typing import Tuple from typing import Optional @@ -7,11 +9,6 @@ from typing import Callable import scipy.stats -def compute_sum(rhs: int, lhs: int) -> int: - """Initial tests to compute sum of two numbers""" - return rhs + lhs - - def map_shared_keys_to_values(target_sample_keys: List, target_sample_vals: dict)-> List: """Function to construct target dataset data items given commoned shared\ keys and trait samplelist values for example given keys >>>>>>>>>>\ @@ -73,14 +70,12 @@ pearson,spearman and biweight mid correlation return value is rho and p_value return (corr_coeffient, p_val) -def compute_sample_r_correlation( - corr_method: str, trait_vals, - target_samples_vals) -> Optional[Tuple[float, float, int]]: +def compute_sample_r_correlation(corr_method, trait_vals, + target_samples_vals) -> Optional[Tuple[float, float, int]]: """Given a primary trait values and target trait values calculate the correlation coeff and p value """ - (sanitized_traits_vals, sanitized_target_vals, num_overlap) = normalize_values(trait_vals, target_samples_vals) @@ -127,35 +122,33 @@ def compute_all_sample_correlation(this_trait, """Given a trait data samplelist and\ target__datasets compute all sample correlation """ + # xtodo fix trait_name currently returning single one this_trait_samples = this_trait["trait_sample_data"] - corr_results = [] - + processed_values = [] for target_trait in target_dataset: - trait_id = target_trait.get("trait_id") + # trait_id = target_trait.get("trait_id") target_trait_data = target_trait["trait_sample_data"] - this_vals, target_vals = filter_shared_sample_keys( - this_trait_samples, target_trait_data) - - sample_correlation = compute_sample_r_correlation( - corr_method=corr_method, - trait_vals=this_vals, - target_samples_vals=target_vals) + # this_vals, target_vals = filter_shared_sample_keys( + # this_trait_samples, target_trait_data) - if sample_correlation is not None: - (corr_coeffient, p_value, num_overlap) = sample_correlation + processed_values.append((corr_method, *filter_shared_sample_keys( + this_trait_samples, target_trait_data))) + with multiprocessing.Pool() as pool: + results = pool.starmap(compute_sample_r_correlation, processed_values) - else: - continue + for sample_correlation in results: + if sample_correlation is not None: + (corr_coeffient, p_value, num_overlap) = sample_correlation - corr_result = { - "corr_coeffient": corr_coeffient, - "p_value": p_value, - "num_overlap": num_overlap - } + corr_result = { + "corr_coeffient": corr_coeffient, + "p_value": p_value, + "num_overlap": num_overlap + } - corr_results.append({trait_id: corr_result}) + corr_results.append({"trait_name_key": corr_result}) return corr_results |