From e5d2ce8f29e43900977b967ec8cac715f544a2f0 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 02:25:45 +0300 Subject: add code for calling gn3 correlation endpoint --- wqflask/wqflask/correlation/correlation_gn3_api.py | 131 ++++++++++++---- wqflask/wqflask/correlation/show_corr_results.py | 174 +++++++++------------ wqflask/wqflask/views.py | 5 +- 3 files changed, 177 insertions(+), 133 deletions(-) (limited to 'wqflask') diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 4cf6533c..7e269e41 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -1,20 +1,17 @@ """module that calls the gn3 api's to do the correlation """ -from base import data_set -from base.trait import create_trait -from base.trait import retrieve_sample_data +import json +import requests +from wqflask.wqflask.correlation import correlation_functions +from wqflask.base import data_set +from wqflask.base.trait import create_trait +from wqflask.base.trait import retrieve_sample_data +GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation" - - - -def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): - import requests - from wqflask.correlation.correlation_gn3_api import compute_correlation - - cor_results = compute_correlation(start_vars) - +def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"): + """integration for integrating sample_r api correlation""" data = { "target_dataset": target_dataset, "target_samplelist": target_samplelist, @@ -29,33 +26,60 @@ def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, m data = results.json() - print(data) - return data -def process_samples(start_vars,sample_names,excluded_samples=None): +def get_tissue_correlation_input(this_trait, trait_symbol_dict): + """Gets tissue expression values for the primary trait and target tissues values""" + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + + target_tissue_data = [] + for trait, symbol in list(trait_symbol_dict.items()): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] + + this_trait_data = {"trait_id": trait, + "tissue_values": this_trait_tissue_values} + + target_tissue_data.append(this_trait_data) + + primary_tissue_data = { + "this_id": "TT", + "tissue_values": primary_trait_tissue_values + + } + + return (primary_tissue_data, target_tissue_data) + + return None + + +def process_samples(start_vars, sample_names, excluded_samples=None): + """process samples method""" sample_data = {} if not excluded_samples: excluded_samples = () - sample_vals_dict = json.loads(start_vars["sample_vals"]) + sample_vals_dict = json.loads(start_vars["sample_vals"]) for sample in sample_names: if sample not in excluded_samples: - val = sample_val_dict[sample] + val = sample_vals_dict[sample] if not val.strip().lower() == "x": - sample_data[str(sample)]=float(value) + sample_data[str(sample)] = float(val) return sample_data -def create_fetch_dataset_data(dataset_name): - this_dataset = data_set.create_dataset(dataset_name=dataset_name) - - this_dataset.get_trait_data() - - def create_target_this_trait(start_vars): """this function prefetch required data for correlation""" @@ -66,12 +90,61 @@ def create_target_this_trait(start_vars): this_trait = create_trait(dataset=this_dataset, name=start_vars['trait_id']) + sample_data = process_samples(start_vars, this_dataset.group.samplelist) + # target_dataset.get_trait_data(list(self.sample_data.keys())) + this_trait = retrieve_sample_data(this_trait, this_dataset) - target_dataset.get_trait_data() + target_dataset.get_trait_data(list(sample_data.keys())) + + return (this_dataset, this_trait, target_dataset, sample_data) + + +def compute_correlation(start_vars, method="pearson"): + """compute correlation for to call gn3 api""" + + corr_type = start_vars['corr_type'] + + (this_dataset, this_trait, target_dataset, + sample_data) = create_target_this_trait(start_vars) + + # cor_results = compute_correlation(start_vars) + + method = start_vars['corr_sample_method'] + + corr_input_data = {} - return (this_dataset,this_trait,target_dataset) -def compute_correlation(start_vars): + if corr_type == "sample": + corr_input_data = { + "target_dataset": target_dataset.trait_data, + "target_samplelist": target_dataset.samplelist, + "trait_data": { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + } + + requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" + + elif corr_type == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, trait_symbol_dict) + + corr_input_data = { + "primary_tissue": primary_tissue_data, + "target_tissues": target_tissue_data + } - this_dataset, this_trait, target_dataset = create_target_this_trait( - start_vars=start_vars) + requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + + else: + pass + # lit correlation/literature + # can fetch values in gn3 not set up in gn3 + + corr_results = requests.post(requests_url, json=corr_input_data) + + data = corr_results.json() + + return data diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index a817a4a4..50b3ba26 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -57,32 +57,6 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 - -def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): - import requests - from wqflask.correlation.correlation_gn3_api import compute_correlation - - # cor_results = compute_correlation(start_vars) - - data = { - "target_dataset": target_dataset, - "target_samplelist": target_samplelist, - "trait_data": { - "trait_sample_data": trait_data, - "trait_id": "HC_Q" - } - } - requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}" - - results = requests.post(requests_url, json=data) - - data = results.json() - - print(data) - - return data - - class CorrelationResults(object): def __init__(self, start_vars): # get trait list from db (database name) @@ -197,81 +171,78 @@ class CorrelationResults(object): trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": - - compute_sample_r(start_vars, - self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist) - # for trait, values in list(self.target_dataset.trait_data.items()): - # self.get_sample_r_and_p_values(trait, values) - - # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), - # key=lambda t: -abs(t[1][0]))) - - # # ZS: Convert min/max chromosome to an int for the location range option - # range_chr_as_int = None - # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - # if 'loc_chr' in start_vars: - # if chr_info.name == self.location_chr: - # range_chr_as_int = order_id - - # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - # trait_object = create_trait( - # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) - # if not trait_object: - # continue - - # chr_as_int = 0 - # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - # if self.location_type == "highest_lod": - # if chr_info.name == trait_object.locus_chr: - # chr_as_int = order_id - # else: - # if chr_info.name == trait_object.chr: - # chr_as_int = order_id - - # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - # float(self.correlation_data[trait][0]) <= self.p_range_upper): - - # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): - # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): - # continue - - # if range_chr_as_int != None and (chr_as_int != range_chr_as_int): - # continue - # if self.location_type == "highest_lod": - # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): - # continue - # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): - # continue - # else: - # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): - # continue - # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): - # continue - - # (trait_object.sample_r, - # trait_object.sample_p, - # trait_object.num_overlap) = self.correlation_data[trait] - - # # Set some sane defaults - # trait_object.tissue_corr = 0 - # trait_object.tissue_pvalue = 0 - # trait_object.lit_corr = 0 - # if self.corr_type == "tissue" and tissue_corr_data != None: - # trait_object.tissue_corr = tissue_corr_data[trait][1] - # trait_object.tissue_pvalue = tissue_corr_data[trait][2] - # elif self.corr_type == "lit": - # trait_object.lit_corr = lit_corr_data[trait][1] - - # self.correlation_results.append(trait_object) - - # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - # self.do_lit_correlation_for_trait_list() - - # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - # self.do_tissue_correlation_for_trait_list() - - # self.json_results = generate_corr_json( - # self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + for trait, values in list(self.target_dataset.trait_data.items()): + self.get_sample_r_and_p_values(trait, values) + + self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), + key=lambda t: -abs(t[1][0]))) + + # ZS: Convert min/max chromosome to an int for the location range option + range_chr_as_int = None + for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + if 'loc_chr' in start_vars: + if chr_info.name == self.location_chr: + range_chr_as_int = order_id + + for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + if not trait_object: + continue + + chr_as_int = 0 + for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + if self.location_type == "highest_lod": + if chr_info.name == trait_object.locus_chr: + chr_as_int = order_id + else: + if chr_info.name == trait_object.chr: + chr_as_int = order_id + + if (float(self.correlation_data[trait][0]) >= self.p_range_lower and + float(self.correlation_data[trait][0]) <= self.p_range_upper): + + if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): + if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): + continue + + if range_chr_as_int != None and (chr_as_int != range_chr_as_int): + continue + if self.location_type == "highest_lod": + if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): + continue + if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): + continue + else: + if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): + continue + if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): + continue + + (trait_object.sample_r, + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] + + # Set some sane defaults + trait_object.tissue_corr = 0 + trait_object.tissue_pvalue = 0 + trait_object.lit_corr = 0 + if self.corr_type == "tissue" and tissue_corr_data != None: + trait_object.tissue_corr = tissue_corr_data[trait][1] + trait_object.tissue_pvalue = tissue_corr_data[trait][2] + elif self.corr_type == "lit": + trait_object.lit_corr = lit_corr_data[trait][1] + + self.correlation_results.append(trait_object) + + if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + self.do_lit_correlation_for_trait_list() + + if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + self.do_tissue_correlation_for_trait_list() + + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -465,7 +436,6 @@ class CorrelationResults(object): return mouse_gene_id - def get_sample_r_and_p_values(self, trait, target_samples): """Calculates the sample r (or rho) and p-value diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 2c0ba586..6ca9b23f 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -46,6 +46,7 @@ from wqflask.marker_regression import run_mapping from wqflask.marker_regression import display_mapping_results from wqflask.network_graph import network_graph from wqflask.correlation import show_corr_results +from wqflask.correlation.correlation_gn3_api import compute_correlation from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis @@ -880,8 +881,8 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - template_vars = show_corr_results.CorrelationResults(request.form) - return render_template("correlation_page.html", **template_vars.__dict__) + correlation_results = compute_correlation(request.form) + return render_template("demo_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3