From be9c4a39500d7978b4cae7536a5f96c3818d211e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 24 Mar 2021 09:41:47 +0300 Subject: initial commit for gn3-correlation api integration --- .../wqflask/correlation/test_correlation_gn3.py | 14 ++++ wqflask/wqflask/correlation/correlation_gn3_api.py | 77 ++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py create mode 100644 wqflask/wqflask/correlation/correlation_gn3_api.py diff --git a/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py new file mode 100644 index 00000000..e1bd6d86 --- /dev/null +++ b/wqflask/tests/unit/wqflask/correlation/test_correlation_gn3.py @@ -0,0 +1,14 @@ +"""this module contains tests for code used in integrating to gn3 api""" +from unittest import TestCase +from base.data_set import create_dataset + +class TestCorrelation(TestCase): + + def test_create_dataset(self): + """test for creating datasets""" + + pass + def test_fetch_dataset_info(self): + """test for fetching dataset info data""" + + pass diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py new file mode 100644 index 00000000..4cf6533c --- /dev/null +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -0,0 +1,77 @@ +"""module that calls the gn3 api's to do the correlation """ +from base import data_set +from base.trait import create_trait +from base.trait import retrieve_sample_data + + + + + + + +def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): + import requests + from wqflask.correlation.correlation_gn3_api import compute_correlation + + cor_results = compute_correlation(start_vars) + + data = { + "target_dataset": target_dataset, + "target_samplelist": target_samplelist, + "trait_data": { + "trait_sample_data": trait_data, + "trait_id": "HC_Q" + } + } + requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}" + + results = requests.post(requests_url, json=data) + + data = results.json() + + print(data) + + return data + + +def process_samples(start_vars,sample_names,excluded_samples=None): + sample_data = {} + if not excluded_samples: + excluded_samples = () + + sample_vals_dict = json.loads(start_vars["sample_vals"]) + + for sample in sample_names: + if sample not in excluded_samples: + val = sample_val_dict[sample] + if not val.strip().lower() == "x": + sample_data[str(sample)]=float(value) + + return sample_data + + +def create_fetch_dataset_data(dataset_name): + this_dataset = data_set.create_dataset(dataset_name=dataset_name) + + this_dataset.get_trait_data() + + +def create_target_this_trait(start_vars): + """this function prefetch required data for correlation""" + + this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['corr_dataset']) + + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) + + this_trait = retrieve_sample_data(this_trait, this_dataset) + + target_dataset.get_trait_data() + + return (this_dataset,this_trait,target_dataset) +def compute_correlation(start_vars): + + this_dataset, this_trait, target_dataset = create_target_this_trait( + start_vars=start_vars) -- cgit v1.2.3 From d913848572dd284ae7656e72dad199e99907871a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 24 Mar 2021 12:59:49 +0300 Subject: initial commit for integrating to gn3 api --- wqflask/wqflask/correlation/show_corr_results.py | 428 +++++++++++++---------- 1 file changed, 242 insertions(+), 186 deletions(-) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index fb4dc4f4..a817a4a4 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -1,4 +1,4 @@ -## Copyright (C) University of Tennessee Health Science Center, Memphis, TN. +# Copyright (C) University of Tennessee Health Science Center, Memphis, TN. # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU Affero General Public License @@ -58,6 +58,31 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 +def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): + import requests + from wqflask.correlation.correlation_gn3_api import compute_correlation + + # cor_results = compute_correlation(start_vars) + + data = { + "target_dataset": target_dataset, + "target_samplelist": target_samplelist, + "trait_data": { + "trait_sample_data": trait_data, + "trait_id": "HC_Q" + } + } + requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}" + + results = requests.post(requests_url, json=data) + + data = results.json() + + print(data) + + return data + + class CorrelationResults(object): def __init__(self, start_vars): # get trait list from db (database name) @@ -78,11 +103,12 @@ class CorrelationResults(object): with Bench("Doing correlations"): if start_vars['dataset'] == "Temp": - self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group']) + self.dataset = data_set.create_dataset( + dataset_name="Temp", dataset_type="Temp", group_name=start_vars['group']) self.trait_id = start_vars['trait_id'] self.this_trait = create_trait(dataset=self.dataset, - name=self.trait_id, - cellid=None) + name=self.trait_id, + cellid=None) else: helper_functions.get_species_dataset_trait(self, start_vars) @@ -97,7 +123,7 @@ class CorrelationResults(object): if ('loc_chr' in start_vars and 'min_loc_mb' in start_vars and - 'max_loc_mb' in start_vars): + 'max_loc_mb' in start_vars): self.location_type = get_string(start_vars, 'location_type') self.location_chr = get_string(start_vars, 'loc_chr') @@ -109,8 +135,8 @@ class CorrelationResults(object): self.get_formatted_corr_type() self.return_number = int(start_vars['corr_return_results']) - #The two if statements below append samples to the sample list based upon whether the user - #rselected Primary Samples Only, Other Samples Only, or All Samples + # The two if statements below append samples to the sample list based upon whether the user + # rselected Primary Samples Only, Other Samples Only, or All Samples primary_samples = self.dataset.group.samplelist if self.dataset.group.parlist != None: @@ -118,23 +144,26 @@ class CorrelationResults(object): if self.dataset.group.f1list != None: primary_samples += self.dataset.group.f1list - #If either BXD/whatever Only or All Samples, append all of that group's samplelist + # If either BXD/whatever Only or All Samples, append all of that group's samplelist if corr_samples_group != 'samples_other': self.process_samples(start_vars, primary_samples) - #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and - #exclude the primary samples (because they would have been added in the previous - #if statement if the user selected All Samples) + # If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and + # exclude the primary samples (because they would have been added in the previous + # if statement if the user selected All Samples) if corr_samples_group != 'samples_primary': if corr_samples_group == 'samples_other': primary_samples = [x for x in primary_samples if x not in ( - self.dataset.group.parlist + self.dataset.group.f1list)] - self.process_samples(start_vars, list(self.this_trait.data.keys()), primary_samples) + self.dataset.group.parlist + self.dataset.group.f1list)] + self.process_samples(start_vars, list( + self.this_trait.data.keys()), primary_samples) - self.target_dataset = data_set.create_dataset(start_vars['corr_dataset']) + self.target_dataset = data_set.create_dataset( + start_vars['corr_dataset']) self.target_dataset.get_trait_data(list(self.sample_data.keys())) - self.header_fields = get_header_fields(self.target_dataset.type, self.corr_method) + self.header_fields = get_header_fields( + self.target_dataset.type, self.corr_method) if self.target_dataset.type == "ProbeSet": self.filter_cols = [7, 6] @@ -153,7 +182,8 @@ class CorrelationResults(object): tissue_corr_data = self.do_tissue_correlation_for_all_traits() if tissue_corr_data != None: for trait in list(tissue_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) else: for trait, values in list(self.target_dataset.trait_data.items()): self.get_sample_r_and_p_values(trait, values) @@ -163,80 +193,85 @@ class CorrelationResults(object): lit_corr_data = self.do_lit_correlation_for_all_traits() for trait in list(lit_corr_data.keys())[:self.return_number]: - self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait]) + self.get_sample_r_and_p_values( + trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": - for trait, values in list(self.target_dataset.trait_data.items()): - self.get_sample_r_and_p_values(trait, values) - - self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), - key=lambda t: -abs(t[1][0]))) - - - #ZS: Convert min/max chromosome to an int for the location range option - range_chr_as_int = None - for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - if 'loc_chr' in start_vars: - if chr_info.name == self.location_chr: - range_chr_as_int = order_id - - for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - trait_object = create_trait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) - if not trait_object: - continue - - chr_as_int = 0 - for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - if self.location_type == "highest_lod": - if chr_info.name == trait_object.locus_chr: - chr_as_int = order_id - else: - if chr_info.name == trait_object.chr: - chr_as_int = order_id - - if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - float(self.correlation_data[trait][0]) <= self.p_range_upper): - - if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): - if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): - continue - - if range_chr_as_int != None and (chr_as_int != range_chr_as_int): - continue - if self.location_type == "highest_lod": - if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): - continue - if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): - continue - else: - if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): - continue - if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): - continue - - (trait_object.sample_r, - trait_object.sample_p, - trait_object.num_overlap) = self.correlation_data[trait] - - # Set some sane defaults - trait_object.tissue_corr = 0 - trait_object.tissue_pvalue = 0 - trait_object.lit_corr = 0 - if self.corr_type == "tissue" and tissue_corr_data != None: - trait_object.tissue_corr = tissue_corr_data[trait][1] - trait_object.tissue_pvalue = tissue_corr_data[trait][2] - elif self.corr_type == "lit": - trait_object.lit_corr = lit_corr_data[trait][1] - - self.correlation_results.append(trait_object) - - if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - self.do_lit_correlation_for_trait_list() - - if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - self.do_tissue_correlation_for_trait_list() - - self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + + compute_sample_r(start_vars, + self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist) + # for trait, values in list(self.target_dataset.trait_data.items()): + # self.get_sample_r_and_p_values(trait, values) + + # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), + # key=lambda t: -abs(t[1][0]))) + + # # ZS: Convert min/max chromosome to an int for the location range option + # range_chr_as_int = None + # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + # if 'loc_chr' in start_vars: + # if chr_info.name == self.location_chr: + # range_chr_as_int = order_id + + # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): + # trait_object = create_trait( + # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + # if not trait_object: + # continue + + # chr_as_int = 0 + # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + # if self.location_type == "highest_lod": + # if chr_info.name == trait_object.locus_chr: + # chr_as_int = order_id + # else: + # if chr_info.name == trait_object.chr: + # chr_as_int = order_id + + # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and + # float(self.correlation_data[trait][0]) <= self.p_range_upper): + + # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): + # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): + # continue + + # if range_chr_as_int != None and (chr_as_int != range_chr_as_int): + # continue + # if self.location_type == "highest_lod": + # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): + # continue + # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): + # continue + # else: + # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): + # continue + # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): + # continue + + # (trait_object.sample_r, + # trait_object.sample_p, + # trait_object.num_overlap) = self.correlation_data[trait] + + # # Set some sane defaults + # trait_object.tissue_corr = 0 + # trait_object.tissue_pvalue = 0 + # trait_object.lit_corr = 0 + # if self.corr_type == "tissue" and tissue_corr_data != None: + # trait_object.tissue_corr = tissue_corr_data[trait][1] + # trait_object.tissue_pvalue = tissue_corr_data[trait][2] + # elif self.corr_type == "lit": + # trait_object.lit_corr = lit_corr_data[trait][1] + + # self.correlation_results.append(trait_object) + + # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + # self.do_lit_correlation_for_trait_list() + + # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + # self.do_tissue_correlation_for_trait_list() + + # self.json_results = generate_corr_json( + # self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -259,39 +294,43 @@ class CorrelationResults(object): def do_tissue_correlation_for_trait_list(self, tissue_dataset_id=1): """Given a list of correlation results (self.correlation_results), gets the tissue correlation value for each""" - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] - gene_symbol_list = [trait.symbol for trait in self.correlation_results if trait.symbol] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] + gene_symbol_list = [ + trait.symbol for trait in self.correlation_results if trait.symbol] - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=gene_symbol_list) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=gene_symbol_list) for trait in self.correlation_results: if trait.symbol and trait.symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[trait.symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) trait.tissue_corr = result[0] trait.tissue_pvalue = result[2] def do_tissue_correlation_for_all_traits(self, tissue_dataset_id=1): - #Gets tissue expression values for the primary trait + # Gets tissue expression values for the primary trait primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [self.this_trait.symbol]) + symbol_list=[self.this_trait.symbol]) if self.this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower()] + primary_trait_tissue_values = primary_trait_tissue_vals_dict[self.this_trait.symbol.lower( + )] #print("trait_gene_symbols: ", pf(trait_gene_symbols.values())) - corr_result_tissue_vals_dict= correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(self.trait_symbol_dict.values())) + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(self.trait_symbol_dict.values())) #print("corr_result_tissue_vals: ", pf(corr_result_tissue_vals_dict)) @@ -300,27 +339,30 @@ class CorrelationResults(object): tissue_corr_data = {} for trait, symbol in list(self.trait_symbol_dict.items()): if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower()] + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] result = correlation_functions.cal_zero_order_corr_for_tiss(primary_trait_tissue_values, - this_trait_tissue_values, - self.corr_method) + this_trait_tissue_values, + self.corr_method) tissue_corr_data[trait] = [symbol, result[0], result[2]] tissue_corr_data = collections.OrderedDict(sorted(list(tissue_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return tissue_corr_data def do_lit_correlation_for_trait_list(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) for trait in self.correlation_results: if trait.geneid: - trait.mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), trait.geneid) + trait.mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), trait.geneid) else: trait.mouse_gene_id = None @@ -348,13 +390,14 @@ class CorrelationResults(object): else: trait.lit_corr = 0 - def do_lit_correlation_for_all_traits(self): - input_trait_mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), self.this_trait.geneid) + input_trait_mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), self.this_trait.geneid) lit_corr_data = {} for trait, gene_id in list(self.trait_geneid_dict.items()): - mouse_gene_id = self.convert_to_mouse_gene_id(self.dataset.group.species.lower(), gene_id) + mouse_gene_id = self.convert_to_mouse_gene_id( + self.dataset.group.species.lower(), gene_id) if mouse_gene_id and str(mouse_gene_id).find(";") == -1: #print("gene_symbols:", input_trait_mouse_gene_id + " / " + mouse_gene_id) @@ -382,7 +425,7 @@ class CorrelationResults(object): lit_corr_data[trait] = [gene_id, 0] lit_corr_data = collections.OrderedDict(sorted(list(lit_corr_data.items()), - key=lambda t: -abs(t[1][1]))) + key=lambda t: -abs(t[1][1]))) return lit_corr_data @@ -422,6 +465,7 @@ class CorrelationResults(object): return mouse_gene_id + def get_sample_r_and_p_values(self, trait, target_samples): """Calculates the sample r (or rho) and p-value @@ -431,6 +475,9 @@ class CorrelationResults(object): """ + print("below here>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") + print(self.target_dataset.trait_data) + self.this_trait_vals = [] target_vals = [] for index, sample in enumerate(self.target_dataset.samplelist): @@ -440,21 +487,26 @@ class CorrelationResults(object): self.this_trait_vals.append(sample_value) target_vals.append(target_sample_value) - self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values(self.this_trait_vals, target_vals) + self.this_trait_vals, target_vals, num_overlap = corr_result_helpers.normalize_values( + self.this_trait_vals, target_vals) if num_overlap > 5: - #ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ + # ZS: 2015 could add biweight correlation, see http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3465711/ if self.corr_method == 'bicor': - sample_r, sample_p = do_bicor(self.this_trait_vals, target_vals) + sample_r, sample_p = do_bicor( + self.this_trait_vals, target_vals) elif self.corr_method == 'pearson': - sample_r, sample_p = scipy.stats.pearsonr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.pearsonr( + self.this_trait_vals, target_vals) else: - sample_r, sample_p = scipy.stats.spearmanr(self.this_trait_vals, target_vals) + sample_r, sample_p = scipy.stats.spearmanr( + self.this_trait_vals, target_vals) if numpy.isnan(sample_r): pass else: - self.correlation_data[trait] = [sample_r, sample_p, num_overlap] + self.correlation_data[trait] = [ + sample_r, sample_p, num_overlap] def process_samples(self, start_vars, sample_names, excluded_samples=None): if not excluded_samples: @@ -475,16 +527,18 @@ def do_bicor(this_trait_vals, target_trait_vals): r_library("WGCNA") r_bicor = ro.r["bicorAndPvalue"] # Map the bicorAndPvalue function - r_options(stringsAsFactors = False) + r_options(stringsAsFactors=False) this_vals = ro.Vector(this_trait_vals) target_vals = ro.Vector(target_trait_vals) - the_r, the_p, _fisher_transform, _the_t, _n_obs = [numpy.asarray(x) for x in r_bicor(x = this_vals, y = target_vals)] + the_r, the_p, _fisher_transform, _the_t, _n_obs = [ + numpy.asarray(x) for x in r_bicor(x=this_vals, y=target_vals)] return the_r, the_p -def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api = False): + +def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_api=False): results_list = [] for i, trait in enumerate(corr_results): if trait.view == False: @@ -493,7 +547,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap results_dict['index'] = i + 1 results_dict['trait_id'] = trait.name results_dict['dataset'] = trait.dataset.name - results_dict['hmac'] = hmac.data_hmac('{}:{}'.format(trait.name, trait.dataset.name)) + results_dict['hmac'] = hmac.data_hmac( + '{}:{}'.format(trait.name, trait.dataset.name)) if target_dataset.type == "ProbeSet": results_dict['symbol'] = trait.symbol results_dict['description'] = "N/A" @@ -544,7 +599,8 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap if bool(trait.authors): authors_list = trait.authors.split(',') if len(authors_list) > 6: - results_dict['authors_display'] = ", ".join(authors_list[:6]) + ", et al." + results_dict['authors_display'] = ", ".join( + authors_list[:6]) + ", et al." else: results_dict['authors_display'] = trait.authors if bool(trait.pubmed_id): @@ -574,85 +630,85 @@ def generate_corr_json(corr_results, this_trait, dataset, target_dataset, for_ap return json.dumps(results_list) + def get_header_fields(data_type, corr_method): if data_type == "ProbeSet": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Lit rho', - 'Tissue rho', - 'Tissue p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Lit rho', + 'Tissue rho', + 'Tissue p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Symbol', - 'Description', - 'Location', - 'Mean', - 'Sample r', - 'N', - 'Sample p(r)', - 'Lit r', - 'Tissue r', - 'Tissue p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Symbol', + 'Description', + 'Location', + 'Mean', + 'Sample r', + 'N', + 'Sample p(r)', + 'Lit r', + 'Tissue r', + 'Tissue p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] elif data_type == "Publish": if corr_method == "spearman": header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample rho', - 'N', - 'Sample p(rho)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample rho', + 'N', + 'Sample p(rho)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: header_fields = ['Index', - 'Record', - 'Abbreviation', - 'Description', - 'Mean', - 'Authors', - 'Year', - 'Sample r', - 'N', - 'Sample p(r)', - 'Max LRS', - 'Max LRS Location', - 'Additive Effect'] + 'Record', + 'Abbreviation', + 'Description', + 'Mean', + 'Authors', + 'Year', + 'Sample r', + 'N', + 'Sample p(r)', + 'Max LRS', + 'Max LRS Location', + 'Additive Effect'] else: if corr_method == "spearman": header_fields = ['Index', - 'ID', - 'Location', - 'Sample rho', - 'N', - 'Sample p(rho)'] + 'ID', + 'Location', + 'Sample rho', + 'N', + 'Sample p(rho)'] else: header_fields = ['Index', - 'ID', - 'Location', - 'Sample r', - 'N', - 'Sample p(r)'] + 'ID', + 'Location', + 'Sample r', + 'N', + 'Sample p(r)'] return header_fields - -- cgit v1.2.3 From e5d2ce8f29e43900977b967ec8cac715f544a2f0 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 02:25:45 +0300 Subject: add code for calling gn3 correlation endpoint --- wqflask/wqflask/correlation/correlation_gn3_api.py | 131 ++++++++++++---- wqflask/wqflask/correlation/show_corr_results.py | 174 +++++++++------------ wqflask/wqflask/views.py | 5 +- 3 files changed, 177 insertions(+), 133 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 4cf6533c..7e269e41 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -1,20 +1,17 @@ """module that calls the gn3 api's to do the correlation """ -from base import data_set -from base.trait import create_trait -from base.trait import retrieve_sample_data +import json +import requests +from wqflask.wqflask.correlation import correlation_functions +from wqflask.base import data_set +from wqflask.base.trait import create_trait +from wqflask.base.trait import retrieve_sample_data +GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation" - - - -def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): - import requests - from wqflask.correlation.correlation_gn3_api import compute_correlation - - cor_results = compute_correlation(start_vars) - +def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"): + """integration for integrating sample_r api correlation""" data = { "target_dataset": target_dataset, "target_samplelist": target_samplelist, @@ -29,33 +26,60 @@ def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, m data = results.json() - print(data) - return data -def process_samples(start_vars,sample_names,excluded_samples=None): +def get_tissue_correlation_input(this_trait, trait_symbol_dict): + """Gets tissue expression values for the primary trait and target tissues values""" + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + + target_tissue_data = [] + for trait, symbol in list(trait_symbol_dict.items()): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] + + this_trait_data = {"trait_id": trait, + "tissue_values": this_trait_tissue_values} + + target_tissue_data.append(this_trait_data) + + primary_tissue_data = { + "this_id": "TT", + "tissue_values": primary_trait_tissue_values + + } + + return (primary_tissue_data, target_tissue_data) + + return None + + +def process_samples(start_vars, sample_names, excluded_samples=None): + """process samples method""" sample_data = {} if not excluded_samples: excluded_samples = () - sample_vals_dict = json.loads(start_vars["sample_vals"]) + sample_vals_dict = json.loads(start_vars["sample_vals"]) for sample in sample_names: if sample not in excluded_samples: - val = sample_val_dict[sample] + val = sample_vals_dict[sample] if not val.strip().lower() == "x": - sample_data[str(sample)]=float(value) + sample_data[str(sample)] = float(val) return sample_data -def create_fetch_dataset_data(dataset_name): - this_dataset = data_set.create_dataset(dataset_name=dataset_name) - - this_dataset.get_trait_data() - - def create_target_this_trait(start_vars): """this function prefetch required data for correlation""" @@ -66,12 +90,61 @@ def create_target_this_trait(start_vars): this_trait = create_trait(dataset=this_dataset, name=start_vars['trait_id']) + sample_data = process_samples(start_vars, this_dataset.group.samplelist) + # target_dataset.get_trait_data(list(self.sample_data.keys())) + this_trait = retrieve_sample_data(this_trait, this_dataset) - target_dataset.get_trait_data() + target_dataset.get_trait_data(list(sample_data.keys())) + + return (this_dataset, this_trait, target_dataset, sample_data) + + +def compute_correlation(start_vars, method="pearson"): + """compute correlation for to call gn3 api""" + + corr_type = start_vars['corr_type'] + + (this_dataset, this_trait, target_dataset, + sample_data) = create_target_this_trait(start_vars) + + # cor_results = compute_correlation(start_vars) + + method = start_vars['corr_sample_method'] + + corr_input_data = {} - return (this_dataset,this_trait,target_dataset) -def compute_correlation(start_vars): + if corr_type == "sample": + corr_input_data = { + "target_dataset": target_dataset.trait_data, + "target_samplelist": target_dataset.samplelist, + "trait_data": { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + } + + requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" + + elif corr_type == "tissue": + trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, trait_symbol_dict) + + corr_input_data = { + "primary_tissue": primary_tissue_data, + "target_tissues": target_tissue_data + } - this_dataset, this_trait, target_dataset = create_target_this_trait( - start_vars=start_vars) + requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + + else: + pass + # lit correlation/literature + # can fetch values in gn3 not set up in gn3 + + corr_results = requests.post(requests_url, json=corr_input_data) + + data = corr_results.json() + + return data diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index a817a4a4..50b3ba26 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -57,32 +57,6 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 - -def compute_sample_r(start_vars,target_dataset, trait_data, target_samplelist, method="pearson"): - import requests - from wqflask.correlation.correlation_gn3_api import compute_correlation - - # cor_results = compute_correlation(start_vars) - - data = { - "target_dataset": target_dataset, - "target_samplelist": target_samplelist, - "trait_data": { - "trait_sample_data": trait_data, - "trait_id": "HC_Q" - } - } - requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}" - - results = requests.post(requests_url, json=data) - - data = results.json() - - print(data) - - return data - - class CorrelationResults(object): def __init__(self, start_vars): # get trait list from db (database name) @@ -197,81 +171,78 @@ class CorrelationResults(object): trait, self.target_dataset.trait_data[trait]) elif self.corr_type == "sample": - - compute_sample_r(start_vars, - self.target_dataset.trait_data, self.sample_data, self.target_dataset.samplelist) - # for trait, values in list(self.target_dataset.trait_data.items()): - # self.get_sample_r_and_p_values(trait, values) - - # self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), - # key=lambda t: -abs(t[1][0]))) - - # # ZS: Convert min/max chromosome to an int for the location range option - # range_chr_as_int = None - # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - # if 'loc_chr' in start_vars: - # if chr_info.name == self.location_chr: - # range_chr_as_int = order_id - - # for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): - # trait_object = create_trait( - # dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) - # if not trait_object: - # continue - - # chr_as_int = 0 - # for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): - # if self.location_type == "highest_lod": - # if chr_info.name == trait_object.locus_chr: - # chr_as_int = order_id - # else: - # if chr_info.name == trait_object.chr: - # chr_as_int = order_id - - # if (float(self.correlation_data[trait][0]) >= self.p_range_lower and - # float(self.correlation_data[trait][0]) <= self.p_range_upper): - - # if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): - # if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): - # continue - - # if range_chr_as_int != None and (chr_as_int != range_chr_as_int): - # continue - # if self.location_type == "highest_lod": - # if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): - # continue - # if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): - # continue - # else: - # if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): - # continue - # if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): - # continue - - # (trait_object.sample_r, - # trait_object.sample_p, - # trait_object.num_overlap) = self.correlation_data[trait] - - # # Set some sane defaults - # trait_object.tissue_corr = 0 - # trait_object.tissue_pvalue = 0 - # trait_object.lit_corr = 0 - # if self.corr_type == "tissue" and tissue_corr_data != None: - # trait_object.tissue_corr = tissue_corr_data[trait][1] - # trait_object.tissue_pvalue = tissue_corr_data[trait][2] - # elif self.corr_type == "lit": - # trait_object.lit_corr = lit_corr_data[trait][1] - - # self.correlation_results.append(trait_object) - - # if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - # self.do_lit_correlation_for_trait_list() - - # if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": - # self.do_tissue_correlation_for_trait_list() - - # self.json_results = generate_corr_json( - # self.correlation_results, self.this_trait, self.dataset, self.target_dataset) + for trait, values in list(self.target_dataset.trait_data.items()): + self.get_sample_r_and_p_values(trait, values) + + self.correlation_data = collections.OrderedDict(sorted(list(self.correlation_data.items()), + key=lambda t: -abs(t[1][0]))) + + # ZS: Convert min/max chromosome to an int for the location range option + range_chr_as_int = None + for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + if 'loc_chr' in start_vars: + if chr_info.name == self.location_chr: + range_chr_as_int = order_id + + for _trait_counter, trait in enumerate(list(self.correlation_data.keys())[:self.return_number]): + trait_object = create_trait( + dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False) + if not trait_object: + continue + + chr_as_int = 0 + for order_id, chr_info in list(self.dataset.species.chromosomes.chromosomes.items()): + if self.location_type == "highest_lod": + if chr_info.name == trait_object.locus_chr: + chr_as_int = order_id + else: + if chr_info.name == trait_object.chr: + chr_as_int = order_id + + if (float(self.correlation_data[trait][0]) >= self.p_range_lower and + float(self.correlation_data[trait][0]) <= self.p_range_upper): + + if (self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Publish") and bool(trait_object.mean): + if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr): + continue + + if range_chr_as_int != None and (chr_as_int != range_chr_as_int): + continue + if self.location_type == "highest_lod": + if (self.min_location_mb != None) and (float(trait_object.locus_mb) < float(self.min_location_mb)): + continue + if (self.max_location_mb != None) and (float(trait_object.locus_mb) > float(self.max_location_mb)): + continue + else: + if (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)): + continue + if (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)): + continue + + (trait_object.sample_r, + trait_object.sample_p, + trait_object.num_overlap) = self.correlation_data[trait] + + # Set some sane defaults + trait_object.tissue_corr = 0 + trait_object.tissue_pvalue = 0 + trait_object.lit_corr = 0 + if self.corr_type == "tissue" and tissue_corr_data != None: + trait_object.tissue_corr = tissue_corr_data[trait][1] + trait_object.tissue_pvalue = tissue_corr_data[trait][2] + elif self.corr_type == "lit": + trait_object.lit_corr = lit_corr_data[trait][1] + + self.correlation_results.append(trait_object) + + if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + self.do_lit_correlation_for_trait_list() + + if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet": + self.do_tissue_correlation_for_trait_list() + + self.json_results = generate_corr_json( + self.correlation_results, self.this_trait, self.dataset, self.target_dataset) ############################################################################################################################################ @@ -465,7 +436,6 @@ class CorrelationResults(object): return mouse_gene_id - def get_sample_r_and_p_values(self, trait, target_samples): """Calculates the sample r (or rho) and p-value diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 2c0ba586..6ca9b23f 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -46,6 +46,7 @@ from wqflask.marker_regression import run_mapping from wqflask.marker_regression import display_mapping_results from wqflask.network_graph import network_graph from wqflask.correlation import show_corr_results +from wqflask.correlation.correlation_gn3_api import compute_correlation from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis @@ -880,8 +881,8 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - template_vars = show_corr_results.CorrelationResults(request.form) - return render_template("correlation_page.html", **template_vars.__dict__) + correlation_results = compute_correlation(request.form) + return render_template("demo_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From 7a1e84cafdf02a1bcef4ddeb653d072b80a8deba Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 02:27:38 +0300 Subject: add initial demo template page --- .../wqflask/templates/demo_correlation_page.html | 36 ++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 wqflask/wqflask/templates/demo_correlation_page.html diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html new file mode 100644 index 00000000..4d310051 --- /dev/null +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -0,0 +1,36 @@ +{% extends "base.html" %} +{% block title %}Demo Correlation Results{% endblock %} +{% block css %} + + + + + + +{% endblock %} +{% block content %} +
+ {{correlation_results}} + + +
+{% endblock %} + +{% block js %} + + + + + + + + + + + + + +{% endblock %} -- cgit v1.2.3 From cf42f769ec4db2efaebca64c63454935cc28b2a3 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 03:55:16 +0300 Subject: modify gn3 integration code --- wqflask/wqflask/correlation/correlation_gn3_api.py | 89 +++++++++------------- .../wqflask/templates/demo_correlation_page.html | 22 +----- 2 files changed, 38 insertions(+), 73 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 7e269e41..7e865bf3 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -10,59 +10,6 @@ from wqflask.base.trait import retrieve_sample_data GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation" -def compute_sample(target_dataset, trait_data, target_samplelist, method="pearson"): - """integration for integrating sample_r api correlation""" - data = { - "target_dataset": target_dataset, - "target_samplelist": target_samplelist, - "trait_data": { - "trait_sample_data": trait_data, - "trait_id": "HC_Q" - } - } - requests_url = f"http://127.0.0.1:8080/api/correlation/sample_x/{method}" - - results = requests.post(requests_url, json=data) - - data = results.json() - - return data - - -def get_tissue_correlation_input(this_trait, trait_symbol_dict): - """Gets tissue expression values for the primary trait and target tissues values""" - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=[this_trait.symbol]) - - if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( - )] - - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(trait_symbol_dict.values())) - - target_tissue_data = [] - for trait, symbol in list(trait_symbol_dict.items()): - if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( - )] - - this_trait_data = {"trait_id": trait, - "tissue_values": this_trait_tissue_values} - - target_tissue_data.append(this_trait_data) - - primary_tissue_data = { - "this_id": "TT", - "tissue_values": primary_trait_tissue_values - - } - - return (primary_tissue_data, target_tissue_data) - - return None - - def process_samples(start_vars, sample_names, excluded_samples=None): """process samples method""" sample_data = {} @@ -81,7 +28,7 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): - """this function prefetch required data for correlation""" + """this function creates the required trait and target dataset for correlation""" this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) target_dataset = data_set.create_dataset( @@ -148,3 +95,37 @@ def compute_correlation(start_vars, method="pearson"): data = corr_results.json() return data + + +def get_tissue_correlation_input(this_trait, trait_symbol_dict): + """Gets tissue expression values for the primary trait and target tissues values""" + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) + + if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + )] + + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + + target_tissue_data = [] + for trait, symbol in list(trait_symbol_dict.items()): + if symbol and symbol.lower() in corr_result_tissue_vals_dict: + this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( + )] + + this_trait_data = {"trait_id": trait, + "tissue_values": this_trait_tissue_values} + + target_tissue_data.append(this_trait_data) + + primary_tissue_data = { + "this_id": "TT", + "tissue_values": primary_trait_tissue_values + + } + + return (primary_tissue_data, target_tissue_data) + + return None diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html index 4d310051..ddcdf38d 100644 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -10,27 +10,11 @@ {% endblock %} {% block content %}
- {{correlation_results}} + - -
-{% endblock %} -{% block js %} - - - - - - - - - - - - - + {% endblock %} -- cgit v1.2.3 From 7de35627a6dc3fa48a039c932be005ffe6c175c4 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 04:02:49 +0300 Subject: fix import error --- wqflask/wqflask/correlation/correlation_gn3_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 7e865bf3..479bb0d8 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -1,11 +1,11 @@ """module that calls the gn3 api's to do the correlation """ import json import requests -from wqflask.wqflask.correlation import correlation_functions +from wqflask.correlation import correlation_functions -from wqflask.base import data_set -from wqflask.base.trait import create_trait -from wqflask.base.trait import retrieve_sample_data +from base import data_set +from base.trait import create_trait +from base.trait import retrieve_sample_data GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation" -- cgit v1.2.3 From da72efa86846179d8d2aa64cd7b06a894469dc85 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 25 Mar 2021 10:14:31 +0300 Subject: minor fix --- wqflask/wqflask/correlation/correlation_gn3_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 479bb0d8..f1137c0e 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -88,7 +88,7 @@ def compute_correlation(start_vars, method="pearson"): else: pass # lit correlation/literature - # can fetch values in gn3 not set up in gn3 + # to fetch values from the database corr_results = requests.post(requests_url, json=corr_input_data) -- cgit v1.2.3 From 08ddec9dcbaa1730d0b65b643aa5c99d1077d4d5 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sun, 28 Mar 2021 21:13:52 +0300 Subject: refactor correlation integration code --- wqflask/wqflask/correlation/correlation_gn3_api.py | 42 ++++++++++++---------- .../wqflask/templates/demo_correlation_page.html | 6 ++-- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index f1137c0e..1cd1b332 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -63,6 +63,7 @@ def compute_correlation(start_vars, method="pearson"): if corr_type == "sample": corr_input_data = { + "target": target_dataset, "target_dataset": target_dataset.trait_data, "target_samplelist": target_dataset.samplelist, "trait_data": { @@ -80,16 +81,17 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = { "primary_tissue": primary_tissue_data, - "target_tissues": target_tissue_data + "target_tissues_dict": target_tissue_data } requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" - else: - pass - # lit correlation/literature - # to fetch values from the database + elif corr_type == "lit": + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset, target_dataset) + requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" + corr_input_data = geneid_dict corr_results = requests.post(requests_url, json=corr_input_data) data = corr_results.json() @@ -97,6 +99,18 @@ def compute_correlation(start_vars, method="pearson"): return data +def do_lit_correlation(this_trait, this_dataset, target_dataset): + geneid_dict = this_dataset.retrieve_genes("GeneId") + species = this_dataset.group.species.lower() + + this_trait_geneid = this_trait.geneid + this_trait_gene_data = { + this_trait.name: this_trait_geneid + } + + return (this_trait_geneid, geneid_dict, species) + + def get_tissue_correlation_input(this_trait, trait_symbol_dict): """Gets tissue expression values for the primary trait and target tissues values""" primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( @@ -108,23 +122,15 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=list(trait_symbol_dict.values())) - - target_tissue_data = [] - for trait, symbol in list(trait_symbol_dict.items()): - if symbol and symbol.lower() in corr_result_tissue_vals_dict: - this_trait_tissue_values = corr_result_tissue_vals_dict[symbol.lower( - )] - - this_trait_data = {"trait_id": trait, - "tissue_values": this_trait_tissue_values} - - target_tissue_data.append(this_trait_data) - primary_tissue_data = { - "this_id": "TT", + "this_id": this_trait.name, "tissue_values": primary_trait_tissue_values } + target_tissue_data = { + "trait_symbol_dict": trait_symbol_dict, + "symbol_tissue_vals_dict": corr_result_tissue_vals_dict + } return (primary_tissue_data, target_tissue_data) diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html index ddcdf38d..a8651067 100644 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -10,11 +10,11 @@ {% endblock %} {% block content %}
- - + {{correlation_results}} +
{% endblock %} -- cgit v1.2.3 From fcb93bef5ab230b948f83e0e77a1ef54b017aca1 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 8 Apr 2021 23:59:13 +0300 Subject: minor fix --- wqflask/wqflask/correlation/correlation_gn3_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 1cd1b332..c8d5347c 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -63,7 +63,6 @@ def compute_correlation(start_vars, method="pearson"): if corr_type == "sample": corr_input_data = { - "target": target_dataset, "target_dataset": target_dataset.trait_data, "target_samplelist": target_dataset.samplelist, "trait_data": { -- cgit v1.2.3 From 328b176628ed9db6c1c60590cb10f4cca212738a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 15 Apr 2021 06:10:28 +0300 Subject: change api port --- wqflask/wqflask/correlation/correlation_gn3_api.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index c8d5347c..8ee4a9b7 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -7,7 +7,7 @@ from base import data_set from base.trait import create_trait from base.trait import retrieve_sample_data -GN3_CORRELATION_API = "http://127.0.0.1:8080/api/correlation" +GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" def process_samples(start_vars, sample_names, excluded_samples=None): @@ -30,6 +30,12 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): """this function creates the required trait and target dataset for correlation""" + + print("creating the dataset and trait") + import time + + initial_time = time.time() + this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) target_dataset = data_set.create_dataset( dataset_name=start_vars['corr_dataset']) @@ -44,6 +50,11 @@ def create_target_this_trait(start_vars): target_dataset.get_trait_data(list(sample_data.keys())) + + time_taken = time.time() - initial_time + + print(f"the time taken to create dataset abnd trait is",time_taken) + return (this_dataset, this_trait, target_dataset, sample_data) @@ -91,6 +102,8 @@ def compute_correlation(start_vars, method="pearson"): requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" corr_input_data = geneid_dict + + print("Sending this request") corr_results = requests.post(requests_url, json=corr_input_data) data = corr_results.json() -- cgit v1.2.3 From 5a9a7a645510d1385def017adf2f956d61fa2329 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 16 Apr 2021 02:09:28 +0300 Subject: add demo template --- wqflask/wqflask/correlation/correlation_gn3_api.py | 3 + .../wqflask/templates/demo_correlation_page.html | 78 ++++++++++++++++++++-- wqflask/wqflask/views.py | 2 +- 3 files changed, 77 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 8ee4a9b7..b4480076 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -47,11 +47,14 @@ def create_target_this_trait(start_vars): # target_dataset.get_trait_data(list(self.sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) + print(f"Starting to creat the target dataset ") + dataset_start_time = time.time() target_dataset.get_trait_data(list(sample_data.keys())) time_taken = time.time() - initial_time + print(f"the time taken to create dataset is",time.time()-dataset_start_time) print(f"the time taken to create dataset abnd trait is",time_taken) diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html index a8651067..1900a0bd 100644 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -10,11 +10,79 @@ {% endblock %} {% block content %}
- {{correlation_results}} - +
CORRELATION RESULTS
+
+

Trait_Name

+

Rho value

+

Num overlap

+

P value

+
+
+ {% for corr_result in correlation_results %} + {% for key,value in corr_result.items()%} +
+

trait_name_here

+ {%for o_key,o_value in value.items()%} +

{{o_value}}

+ {%endfor%} + {% endfor %} +
+ {% endfor %} +
+ + {% endblock %} + diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 6ca9b23f..072db466 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -882,7 +882,7 @@ def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) correlation_results = compute_correlation(request.form) - return render_template("demo_correlation_page.html",correlation_results=correlation_results) + return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20]) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From 33e03898ee733f18b29e54e202c217ba14921f48 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 17 Apr 2021 04:14:33 +0300 Subject: use gn3 lib --- bin/genenetwork2 | 3 +- wqflask/wqflask/correlation/correlation_gn3_api.py | 57 +++++++++++++++++----- .../wqflask/templates/demo_correlation_page.html | 2 +- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index 5f4e0f9a..917d6549 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -154,7 +154,8 @@ if [ ! -d $R_LIBS_SITE ] ; then fi # We may change this one: -export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH +# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH +PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/genenetwork3:$PYTHONPATH # Our UNIX TMPDIR defaults to /tmp - change this on a shared server if [ -z $TMPDIR ]; then diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index b4480076..c1d6132b 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -1,11 +1,17 @@ """module that calls the gn3 api's to do the correlation """ import json import requests +import time from wqflask.correlation import correlation_functions from base import data_set from base.trait import create_trait from base.trait import retrieve_sample_data +# gn3 lib +from gn3.computations.correlations import compute_all_sample_correlation +from gn3.computations.correlations import benchmark_compute_all_sample +from gn3.computations.correlations import map_shared_keys_to_values +from gn3.computations.correlations import compute_all_tissue_correlation GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" @@ -30,7 +36,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): """this function creates the required trait and target dataset for correlation""" - print("creating the dataset and trait") import time @@ -52,11 +57,10 @@ def create_target_this_trait(start_vars): target_dataset.get_trait_data(list(sample_data.keys())) - time_taken = time.time() - initial_time - print(f"the time taken to create dataset is",time.time()-dataset_start_time) + print(f"the time taken to create dataset is", time.time()-dataset_start_time) - print(f"the time taken to create dataset abnd trait is",time_taken) + print(f"the time taken to create dataset abnd trait is", time_taken) return (this_dataset, this_trait, target_dataset, sample_data) @@ -76,16 +80,34 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = {} if corr_type == "sample": - corr_input_data = { - "target_dataset": target_dataset.trait_data, - "target_samplelist": target_dataset.samplelist, - "trait_data": { - "trait_sample_data": sample_data, - "trait_id": start_vars["trait_id"] - } + # corr_input_data = { + # "target_dataset": target_dataset.trait_data, + # "target_samplelist": target_dataset.samplelist, + # "trait_data": { + # "trait_sample_data": sample_data, + # "trait_id": start_vars["trait_id"] + # } + # } + + + + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] } - requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" + initial_time = time.time() + print("Calling sample correlation") + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + correlation_results = compute_all_sample_correlation(corr_method=method, + this_trait=this_trait_data, + target_dataset=results) + + print("Time taken is>>>>",time.time()-initial_time) + + # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" + return correlation_results elif corr_type == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") @@ -96,8 +118,17 @@ def compute_correlation(start_vars, method="pearson"): "primary_tissue": primary_tissue_data, "target_tissues_dict": target_tissue_data } + print("Calling tissue correlation") + initial_time = time.time() + correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data["target_tissues_dict"], + corr_method=method) + + time_taken = time.time() + print("Time taken is ??????",time_taken-initial_time) - requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + return correlation_results elif corr_type == "lit": (this_trait_geneid, geneid_dict, species) = do_lit_correlation( diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html index 1900a0bd..d2979f9d 100644 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -21,7 +21,7 @@ {% for corr_result in correlation_results %} {% for key,value in corr_result.items()%}
-

trait_name_here

+

{{key}}

{%for o_key,o_value in value.items()%}

{{o_value}}

{%endfor%} -- cgit v1.2.3 From ba2fa2025bdc381346afc8ec3203f229ed3551d6 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 17 Apr 2021 13:43:44 +0300 Subject: refactoring fetching of data --- wqflask/wqflask/correlation/correlation_gn3_api.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index c1d6132b..75bd5561 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -48,20 +48,17 @@ def create_target_this_trait(start_vars): this_trait = create_trait(dataset=this_dataset, name=start_vars['trait_id']) - sample_data = process_samples(start_vars, this_dataset.group.samplelist) + # target_dataset.get_trait_data(list(self.sample_data.keys())) - this_trait = retrieve_sample_data(this_trait, this_dataset) + # this_trait = retrieve_sample_data(this_trait, this_dataset) print(f"Starting to creat the target dataset ") dataset_start_time = time.time() + sample_data = () - target_dataset.get_trait_data(list(sample_data.keys())) + time_taken = time.time() - initial_time - print(f"the time taken to create dataset is", time.time()-dataset_start_time) - - print(f"the time taken to create dataset abnd trait is", time_taken) - return (this_dataset, this_trait, target_dataset, sample_data) @@ -89,6 +86,10 @@ def compute_correlation(start_vars, method="pearson"): # } # } + sample_data = process_samples(start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + this_trait = retrieve_sample_data(this_trait, this_dataset) + this_trait_data = { @@ -111,8 +112,10 @@ def compute_correlation(start_vars, method="pearson"): elif corr_type == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") + time_to_retrieve = time.time() primary_tissue_data, target_tissue_data = get_tissue_correlation_input( this_trait, trait_symbol_dict) + print("Time taken to retrieve this is",time.time()-time_to_retrieve) corr_input_data = { "primary_tissue": primary_tissue_data, -- cgit v1.2.3 From 50c0ee93a59eecd40a6fbd19139671c94003c21b Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 19 Apr 2021 00:24:36 +0300 Subject: fix for correlation_demo template --- wqflask/wqflask/correlation/correlation_gn3_api.py | 53 ++++++++++------------ .../wqflask/templates/demo_correlation_page.html | 23 +++++++++- 2 files changed, 44 insertions(+), 32 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 75bd5561..ba606b92 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -9,9 +9,10 @@ from base.trait import create_trait from base.trait import retrieve_sample_data # gn3 lib from gn3.computations.correlations import compute_all_sample_correlation -from gn3.computations.correlations import benchmark_compute_all_sample from gn3.computations.correlations import map_shared_keys_to_values from gn3.computations.correlations import compute_all_tissue_correlation +from gn3.computations.correlations import compute_all_lit_correlation +from gn3.db_utils import database_connector GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" @@ -36,10 +37,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): """this function creates the required trait and target dataset for correlation""" - print("creating the dataset and trait") - import time - - initial_time = time.time() this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) target_dataset = data_set.create_dataset( @@ -48,17 +45,10 @@ def create_target_this_trait(start_vars): this_trait = create_trait(dataset=this_dataset, name=start_vars['trait_id']) - # target_dataset.get_trait_data(list(self.sample_data.keys())) # this_trait = retrieve_sample_data(this_trait, this_dataset) - print(f"Starting to creat the target dataset ") - dataset_start_time = time.time() sample_data = () - - - - time_taken = time.time() - initial_time return (this_dataset, this_trait, target_dataset, sample_data) @@ -77,6 +67,7 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = {} if corr_type == "sample": + initial_time = time.time() # corr_input_data = { # "target_dataset": target_dataset.trait_data, # "target_samplelist": target_dataset.samplelist, @@ -85,50 +76,44 @@ def compute_correlation(start_vars, method="pearson"): # "trait_id": start_vars["trait_id"] # } # } - - sample_data = process_samples(start_vars, this_dataset.group.samplelist) + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) - + print("Creating dataset and trait took",time.time()-initial_time) this_trait_data = { "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } - initial_time = time.time() - print("Calling sample correlation") results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) correlation_results = compute_all_sample_correlation(corr_method=method, this_trait=this_trait_data, target_dataset=results) - print("Time taken is>>>>",time.time()-initial_time) + print("doing sample correlation took",time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results elif corr_type == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") - time_to_retrieve = time.time() primary_tissue_data, target_tissue_data = get_tissue_correlation_input( this_trait, trait_symbol_dict) - print("Time taken to retrieve this is",time.time()-time_to_retrieve) corr_input_data = { "primary_tissue": primary_tissue_data, "target_tissues_dict": target_tissue_data } - print("Calling tissue correlation") initial_time = time.time() correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], target_tissues_data=corr_input_data["target_tissues_dict"], corr_method=method) + print("time taken for compute tissue is",time.time()-initial_time) - time_taken = time.time() - print("Time taken is ??????",time_taken-initial_time) # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" return correlation_results @@ -137,15 +122,23 @@ def compute_correlation(start_vars, method="pearson"): (this_trait_geneid, geneid_dict, species) = do_lit_correlation( this_trait, this_dataset, target_dataset) - requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" - corr_input_data = geneid_dict + conn, _cursor_object = database_connector() + initial_time = time.time() + with conn: - print("Sending this request") - corr_results = requests.post(requests_url, json=corr_input_data) + lit_corr_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) - data = corr_results.json() + return lit_corr_results + print("the time taken is",time.time()-initial_time) + # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" + # corr_input_data = geneid_dict + # corr_results = requests.post(requests_url, json=corr_input_data) - return data + # data = corr_results.json() + + # return data def do_lit_correlation(this_trait, this_dataset, target_dataset): @@ -164,11 +157,11 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): """Gets tissue expression values for the primary trait and target tissues values""" primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=[this_trait.symbol]) - if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] + time_to_to_fetch_all = time.time() corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=list(trait_symbol_dict.values())) primary_tissue_data = { diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html index d2979f9d..67e3c57c 100644 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ b/wqflask/wqflask/templates/demo_correlation_page.html @@ -22,9 +22,28 @@ {% for key,value in corr_result.items()%}

{{key}}

- {%for o_key,o_value in value.items()%} + + {% if "corr_coeffient" in value %} +

{{value["corr_coeffient"]}}

+ {%elif "tissue_corr" in value %} +

{{value["tissue_corr"]}}

+ {%elif "lit_corr" in value %} + {{value["lit_corr"]}} + {% endif %} + {%if "tissue_number" in value %} +
{{value["tissue_number"]}}
+ {%elif "num_overlap" in value %} +

{{value["num_overlap"]}}

+ {% endif %} +

{{value["p_value"]}}

+ + + + + {% endfor %}
{% endfor %} -- cgit v1.2.3 From 34e4933de5a1cd444abe618fcfd93b424bf3442e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 20 Apr 2021 01:38:26 +0300 Subject: refactor code for iterating mrna tissue data --- wqflask/base/mrna_assay_tissue_data.py | 39 +++++++++++++++++++--- .../wqflask/correlation/correlation_functions.py | 6 ++-- wqflask/wqflask/correlation/correlation_gn3_api.py | 24 +++++++------ 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index f1929518..0220d73b 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -6,6 +6,7 @@ from utility import db_tools from utility import Bunch from utility.db_tools import escape +from gn3.db_utils import database_connector from utility.logger import getLogger @@ -44,16 +45,42 @@ class MrnaAssayTissueData(object): and t.Mean = x.maxmean; '''.format(in_clause) - results = g.db.execute(query).fetchall() - lower_symbols = [] + # lower_symbols = [] + lower_symbols = {} for gene_symbol in gene_symbols: + # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: - lower_symbols.append(gene_symbol.lower()) - + lower_symbols[gene_symbol.lower()] = True + + import time + # initial_time = time.time() + # conn,cursor = database_connector() + # cursor.execute(query) + # for result in cursor.fetchall(): + # symbol = result[0] + # self.data[symbol].gene_id = result[1] + # self.data[symbol].data_id = result[2] + # self.data[symbol].chr = result[3] + # self.data[symbol].mb = result[4] + # self.data[symbol].description = result[5] + # self.data[symbol].probe_target_description = result[6] + + + # print("my loop takes>>>>",time.time()-initial_time) + # conn.close() + # r + + # takes 5 seconds + initial_time = time.time() + results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - if symbol.lower() in lower_symbols: + # if symbol is not None + # exists = lower_symbols.get(symbol.lower()) + # if symbol.lower() in lower_symbols: + if symbol is not None and lower_symbols.get(symbol.lower()): + symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -62,6 +89,7 @@ class MrnaAssayTissueData(object): self.data[symbol].mb = result.Mb self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description + print("time taken in the loop is",time.time()-initial_time) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -82,6 +110,7 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index fd7691d4..af1d6060 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) - - if len(tissue_data.gene_symbols): - return tissue_data.get_symbol_values_pairs() + if len(tissue_data.gene_symbols) >0: + results = tissue_data.get_symbol_values_pairs() + return results diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index ba606b92..e7394647 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import map_shared_keys_to_values from gn3.computations.correlations import compute_all_tissue_correlation from gn3.computations.correlations import compute_all_lit_correlation +from gn3.computations.correlations import experimental_compute_all_tissue_correlation from gn3.db_utils import database_connector GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" @@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): """this function creates the required trait and target dataset for correlation""" - this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) target_dataset = data_set.create_dataset( dataset_name=start_vars['corr_dataset']) @@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"): target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) - print("Creating dataset and trait took",time.time()-initial_time) + print("Creating dataset and trait took", time.time()-initial_time) this_trait_data = { "trait_sample_data": sample_data, @@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) - print("doing sample correlation took",time.time()-initial_time) + print("doing sample correlation took", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results @@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"): "target_tissues_dict": target_tissue_data } initial_time = time.time() - correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], - target_tissues_data=corr_input_data["target_tissues_dict"], - corr_method=method) - print("time taken for compute tissue is",time.time()-initial_time) - + correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data[ + "target_tissues_dict"], + corr_method=method) + print("correlation y took", time.time()-initial_time) + # initial_time = time.time() + # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], + # target_tissues_data=corr_input_data["target_tissues_dict"], + # corr_method=method) + # print("time taken for compute tissue is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" return correlation_results @@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"): species=species, gene_id=this_trait_geneid) return lit_corr_results - print("the time taken is",time.time()-initial_time) + print("the time taken is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" # corr_input_data = geneid_dict # corr_results = requests.post(requests_url, json=corr_input_data) @@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] - time_to_to_fetch_all = time.time() corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=list(trait_symbol_dict.values())) primary_tissue_data = { -- cgit v1.2.3 From 1b0566d7c9779b979d20c350f66d5628fb55eba6 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 23 Apr 2021 23:22:46 +0300 Subject: debugging for fetching probe data --- wqflask/base/data_set.py | 51 ++++++++++++++++++++-- wqflask/wqflask/correlation/correlation_gn3_api.py | 2 +- wqflask/wqflask/views.py | 3 ++ 3 files changed, 51 insertions(+), 5 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 178234fe..468c4da0 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -115,7 +115,8 @@ Publish or ProbeSet. E.g. except: pass - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) def set_dataset_key(self, t, name): """If name is not in the object's dataset dictionary, set it, and update @@ -154,10 +155,12 @@ Publish or ProbeSet. E.g. if t in ['pheno', 'other_pheno']: group_name = name.replace("Publish", "") - results = g.db.execute(sql_query_mapping[t].format(group_name)).fetchone() + results = g.db.execute( + sql_query_mapping[t].format(group_name)).fetchone() if results: self.datasets[name] = dataset_name_mapping[t] - self.redis_instance.set("dataset_structure", json.dumps(self.datasets)) + self.redis_instance.set( + "dataset_structure", json.dumps(self.datasets)) return True return None @@ -169,7 +172,8 @@ Publish or ProbeSet. E.g. # This has side-effects, with the end result being a truth-y value if(self.set_dataset_key(t, name)): break - return self.datasets.get(name, None) # Return None if name has not been set + # Return None if name has not been set + return self.datasets.get(name, None) # Do the intensive work at startup one time only @@ -651,6 +655,43 @@ class DataSet(object): "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass + def fetch_probe_trait_data(self, sample_list=None): + if sample_list: + self.samplelist = sample_list + else: + self.samplelist = self.group.samplelist + + if self.group.parlist != None and self.group.f1list != None: + if (self.group.parlist + self.group.f1list) in self.samplelist: + self.samplelist += self.group.parlist + self.group.f1list + + query = """ + SELECT Strain.Name, Strain.Id FROM Strain, Species + WHERE Strain.Name IN {} + and Strain.SpeciesId=Species.Id + and Species.name = '{}' + """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) + logger.sql(query) + results = dict(g.db.execute(query).fetchall()) + sample_ids = [results[item] for item in self.samplelist] + + query = """SELECT * from ProbeSetData WHERE Id in ( SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = 'HC_M2_0606_P' and ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id ) and StrainId in ({})""".format( + ",".join(str(sample_id) for sample_id in sample_ids)) + + results = g.db.execute(query).fetchall() + + # with conn: + # cursor = conn.cursor() + # cursor.execute(query) + # results = cursor.fetchall() + trait_data = {} + for trait_id, StrainId, value in results: + if trait_id in trait_data: + trait_data[trait_id].append(value) + else: + trait_data[trait_id] = [value] + self.trait_data = trait_data + def get_trait_data(self, sample_list=None): if sample_list: self.samplelist = sample_list @@ -670,6 +711,7 @@ class DataSet(object): logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] + print("the number of sample ids are", len(sample_ids)) # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks @@ -720,6 +762,7 @@ class DataSet(object): trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) + print("the trait count is >>>", trait_count) self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index e7394647..51bf5fb5 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -78,7 +78,7 @@ def compute_correlation(start_vars, method="pearson"): # } sample_data = process_samples( start_vars, this_dataset.group.samplelist) - target_dataset.get_trait_data(list(sample_data.keys())) + target_dataset.fetch_probe_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) print("Creating dataset and trait took", time.time()-initial_time) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 072db466..2c239425 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -881,7 +881,10 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) + import time + initial_time = time.time() correlation_results = compute_correlation(request.form) + print(">>>>Time taken by this endpoint",time.time()-initial_time) return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20]) @app.route("/corr_matrix", methods=('POST',)) -- cgit v1.2.3 From 7556f8a5dfc4c98bc0f0c8241592acec22b65102 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 26 Apr 2021 15:42:07 +0300 Subject: test for probe-type sample and tissue --- wqflask/wqflask/correlation/correlation_gn3_api.py | 71 +++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 51bf5fb5..c945f699 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -52,8 +52,64 @@ def create_target_this_trait(start_vars): return (this_dataset, this_trait, target_dataset, sample_data) +def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars): + sample_data = process_samples( + start_vars, this_dataset.group.samplelist) + target_dataset.get_trait_data(list(sample_data.keys())) + + this_trait = retrieve_sample_data(this_trait, this_dataset) + + this_trait_data = { + "trait_sample_data": sample_data, + "trait_id": start_vars["trait_id"] + } + # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results]) + # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[ + # 0] in corr_results_traits, target_dataset_data)) + results = map_shared_keys_to_values( + target_dataset.samplelist, target_dataset.trait_data) + correlation_results = compute_all_sample_correlation(corr_method="pearson", + this_trait=this_trait_data, + target_dataset=results) + + + return correlation_results + + +def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait): + # # print(corr_results[0])-- + # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}] + + print("creating trait_lists") + # corr_results = corr_results[0::] + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + print("finished creating trait_list") + + traits_symbol_dict = this_dataset.retrieve_genes("Symbol") + print("Retrieved symbol dict") + print("creating dict here>>>>>>>>>") + import time + init_time = time.time() + traits_symbol_dict = dict({trait_name: symbol for ( + trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) + print("time taken to create this max dict is>>>>", time.time()-init_time) + print("finished creatinf the dict") + print("Fetching tissue datas") + primary_tissue_data, target_tissue_data = get_tissue_correlation_input( + this_trait, traits_symbol_dict) + print("finihsed>>>>>>>>>>>>>>>>>>") + print("Calling experimental_compute_all_tissue_correlation") + corr_results = experimental_compute_all_tissue_correlation( + primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson") + # print('finished calling this tissue reuslts',corr_results) + + return corr_results + + def compute_correlation(start_vars, method="pearson"): """compute correlation for to call gn3 api""" + import time corr_type = start_vars['corr_type'] @@ -67,6 +123,7 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = {} if corr_type == "sample": + import time initial_time = time.time() # corr_input_data = { # "target_dataset": target_dataset.trait_data, @@ -78,7 +135,7 @@ def compute_correlation(start_vars, method="pearson"): # } sample_data = process_samples( start_vars, this_dataset.group.samplelist) - target_dataset.fetch_probe_trait_data(list(sample_data.keys())) + target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) print("Creating dataset and trait took", time.time()-initial_time) @@ -94,8 +151,15 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) + print("computedd>>>>>>>>>>>>>") + print("doing sample correlation took", time.time()-initial_time) + other_results_time = time.time() + other_results = tissue_for_trait_lists( + correlation_results, this_dataset, target_dataset, this_trait) + print(">>>time taken for this is", time.time()-other_results_time) + # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results @@ -121,6 +185,9 @@ def compute_correlation(start_vars, method="pearson"): # print("time taken for compute tissue is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" + + sample_results = sample_for_trait_lists( + correlation_results, target_dataset, this_trait, this_dataset, start_vars) return correlation_results elif corr_type == "lit": @@ -148,6 +215,8 @@ def compute_correlation(start_vars, method="pearson"): def do_lit_correlation(this_trait, this_dataset, target_dataset): geneid_dict = this_dataset.retrieve_genes("GeneId") + # + print("CALLING THE LIT CORRELATION HERE") species = this_dataset.group.species.lower() this_trait_geneid = this_trait.geneid -- cgit v1.2.3 From 067d27460965aaf1ceaa863a315a0c7dbc47ae02 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 26 Apr 2021 17:05:06 +0300 Subject: fix:remove debug statements and commented code --- wqflask/base/mrna_assay_tissue_data.py | 25 --------- wqflask/wqflask/correlation/correlation_gn3_api.py | 60 +++------------------- 2 files changed, 8 insertions(+), 77 deletions(-) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index 0220d73b..5a64afb2 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -52,33 +52,9 @@ class MrnaAssayTissueData(object): # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: lower_symbols[gene_symbol.lower()] = True - - import time - # initial_time = time.time() - # conn,cursor = database_connector() - # cursor.execute(query) - # for result in cursor.fetchall(): - # symbol = result[0] - # self.data[symbol].gene_id = result[1] - # self.data[symbol].data_id = result[2] - # self.data[symbol].chr = result[3] - # self.data[symbol].mb = result[4] - # self.data[symbol].description = result[5] - # self.data[symbol].probe_target_description = result[6] - - - # print("my loop takes>>>>",time.time()-initial_time) - # conn.close() - # r - - # takes 5 seconds - initial_time = time.time() results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - # if symbol is not None - # exists = lower_symbols.get(symbol.lower()) - # if symbol.lower() in lower_symbols: if symbol is not None and lower_symbols.get(symbol.lower()): symbol = symbol.lower() @@ -89,7 +65,6 @@ class MrnaAssayTissueData(object): self.data[symbol].mb = result.Mb self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description - print("time taken in the loop is",time.time()-initial_time) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index c945f699..3c21a850 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -63,9 +63,6 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } - # trait_lists = dict([(list(corr_result)[0],True) for corr_result in corr_results]) - # target_dataset.trait_data =list(filter(lambda dict_obj: dict_obj.keys()[ - # 0] in corr_results_traits, target_dataset_data)) results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) correlation_results = compute_all_sample_correlation(corr_method="pearson", @@ -77,33 +74,15 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait): - # # print(corr_results[0])-- - # [{"awsdsd_at": {'corr_coeffient': 0.49714692782257336, 'p_value': 1.872077762359228e-05, 'num_overlap': 67}}] - - print("creating trait_lists") - # corr_results = corr_results[0::] trait_lists = dict([(list(corr_result)[0], True) for corr_result in corr_results]) - print("finished creating trait_list") - traits_symbol_dict = this_dataset.retrieve_genes("Symbol") - print("Retrieved symbol dict") - print("creating dict here>>>>>>>>>") - import time - init_time = time.time() traits_symbol_dict = dict({trait_name: symbol for ( trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) - print("time taken to create this max dict is>>>>", time.time()-init_time) - print("finished creatinf the dict") - print("Fetching tissue datas") primary_tissue_data, target_tissue_data = get_tissue_correlation_input( this_trait, traits_symbol_dict) - print("finihsed>>>>>>>>>>>>>>>>>>") - print("Calling experimental_compute_all_tissue_correlation") corr_results = experimental_compute_all_tissue_correlation( primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson") - # print('finished calling this tissue reuslts',corr_results) - return corr_results @@ -123,22 +102,14 @@ def compute_correlation(start_vars, method="pearson"): corr_input_data = {} if corr_type == "sample": - import time - initial_time = time.time() - # corr_input_data = { - # "target_dataset": target_dataset.trait_data, - # "target_samplelist": target_dataset.samplelist, - # "trait_data": { - # "trait_sample_data": sample_data, - # "trait_id": start_vars["trait_id"] - # } - # } + sample_data = process_samples( start_vars, this_dataset.group.samplelist) + initial_time = time.time() target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) + print("Creating target dataset and trait took", time.time()-initial_time) - print("Creating dataset and trait took", time.time()-initial_time) this_trait_data = { "trait_sample_data": sample_data, @@ -151,15 +122,9 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) - print("computedd>>>>>>>>>>>>>") - print("doing sample correlation took", time.time()-initial_time) - - other_results_time = time.time() - other_results = tissue_for_trait_lists( - correlation_results, this_dataset, target_dataset, this_trait) - print(">>>time taken for this is", time.time()-other_results_time) - + # other_results = tissue_for_trait_lists( + # correlation_results, this_dataset, target_dataset, this_trait) # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results @@ -177,17 +142,9 @@ def compute_correlation(start_vars, method="pearson"): target_tissues_data=corr_input_data[ "target_tissues_dict"], corr_method=method) - print("correlation y took", time.time()-initial_time) - # initial_time = time.time() - # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], - # target_tissues_data=corr_input_data["target_tissues_dict"], - # corr_method=method) - # print("time taken for compute tissue is", time.time()-initial_time) - - # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" - - sample_results = sample_for_trait_lists( - correlation_results, target_dataset, this_trait, this_dataset, start_vars) + print("computing tissue took >>>>", time.time()-initial_time) + # sample_results = sample_for_trait_lists( + # correlation_results, target_dataset, this_trait, this_dataset, start_vars) return correlation_results elif corr_type == "lit": @@ -203,7 +160,6 @@ def compute_correlation(start_vars, method="pearson"): species=species, gene_id=this_trait_geneid) return lit_corr_results - print("the time taken is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" # corr_input_data = geneid_dict # corr_results = requests.post(requests_url, json=corr_input_data) -- cgit v1.2.3 From ac9be3f74e005e95a057f2c49baa7822d05f1ece Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Wed, 28 Apr 2021 08:46:53 +0300 Subject: minor fixes for correlation --- bin/genenetwork2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index 917d6549..f73f235c 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -155,7 +155,7 @@ fi # We may change this one: # export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH -PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/genenetwork3:$PYTHONPATH +PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/project/genenetwork3:$PYTHONPATH # Our UNIX TMPDIR defaults to /tmp - change this on a shared server if [ -z $TMPDIR ]; then -- cgit v1.2.3 From 44bcda38241e06a27c386f612d3fc2bae96a1924 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Fri, 30 Apr 2021 02:45:26 +0300 Subject: add template for correlation result --- .../wqflask/templates/test_correlation_page.html | 140 +++++++++++++++++++++ wqflask/wqflask/views.py | 2 +- 2 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 wqflask/wqflask/templates/test_correlation_page.html diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html new file mode 100644 index 00000000..051d84db --- /dev/null +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -0,0 +1,140 @@ +{% extends "base.html" %} +{% block title %}Correlation Results{% endblock %} +{% block css %} + + + + + + + + +{% endblock %} + +{% block content %} + +
+

Correlation Results for Dataset_name against trait_name for the top allResults

+
+ + + + + + + + + + +
indextrait_nameSample rSample p(r)N
+ +{% endblock %} + +{% block js %} + + + + + + + + + + + + + + + + +{% endblock %} \ No newline at end of file diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 2c239425..3d4376e2 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -885,7 +885,7 @@ def corr_compute_page(): initial_time = time.time() correlation_results = compute_correlation(request.form) print(">>>>Time taken by this endpoint",time.time()-initial_time) - return render_template("demo_correlation_page.html",correlation_results=correlation_results[1:20]) + return render_template("test_correlation_page.html",correlation_results=correlation_results[0:50]) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From 8637c4f0487117c43be629b8bd14e51c48e5fbcf Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 1 May 2021 00:12:00 +0300 Subject: add toggle for columns --- .../wqflask/templates/test_correlation_page.html | 72 ++++++++++++---------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html index 051d84db..40d9836c 100644 --- a/wqflask/wqflask/templates/test_correlation_page.html +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -16,6 +16,8 @@ .trait_col { font-weight:bolder; text-align: center; + color:#036ffc; + /*font-size: 1.1em;*/ } table th { font-weight: bolder; @@ -27,17 +29,32 @@ .correlation-title h3 span { font-weight: bolder; } + .header-toggle-vis { + padding:10px 5px; + } + .header-toggle-vis button { + border-radius: 5px; + + } {% endblock %} {% block content %}
-

Correlation Results for Dataset_name against trait_name for the top allResults

+

Correlation Results for Dataset_name against trait_name for the top all Results

+
+
+ + + + +
+ @@ -83,9 +100,10 @@ {% endblock %} \ No newline at end of file -- cgit v1.2.3 From 05acf00467b9338072cde7556dc7b0ca130242c9 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 1 May 2021 01:14:30 +0300 Subject: add default values for empt columns --- .../wqflask/templates/test_correlation_page.html | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html index 40d9836c..be9ec226 100644 --- a/wqflask/wqflask/templates/test_correlation_page.html +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -45,6 +45,7 @@

Correlation Results for Dataset_name against trait_name for the top all Results

+

Toggle Columns

@@ -60,6 +61,8 @@
+ +
index trait_name Sample r Sample r Sample p(r) NTissue rhoLit rho
@@ -85,8 +88,10 @@ // document.querySelector(".content").innerHTML =correlationResults // parse the data let counter = 0; + let corr_type = "tissue"; correlationResults =correlationResults.map((trait_object)=>{ let trait_name = Object.keys(trait_object)[0] + let new_dict = { "index":counter, "trait_name":trait_name, @@ -94,7 +99,9 @@ } counter++; return new_dict; - }) + }) + +console.log(correlationResults) @@ -102,13 +109,15 @@ $(document).ready(function() { let table = $('#example').DataTable( { "data": correlationResults, - "columns": [ - {"data":null,"width":"25px"}, + "columns": [ + {"data":corr_type=="sample"?null:"fd","width":"25px"}, { "data": "index","width":"120px","title":"Index" }, { "data": "trait_name","title":"TraitName"}, - { "data": "corr_coeffient"}, - { "data": "p_value"}, - { "data": "num_overlap"} + { "data": "corr_coeffient","defaultContent": "--"}, + { "data": "p_value","defaultContent":"--"}, + { "data": "num_overlap","defaultContent":"--"}, + {"data":"tissue_corr","defaultContent":"--","title":"Tissue rho"}, + {"data":"lit_corr","defaultContent":"--","title":"Lit rho"} ], "columnDefs": [ { -- cgit v1.2.3 From ba2253944a4752f0413c57679f05b9c05cbd9bea Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 1 May 2021 01:34:13 +0300 Subject: modify column for p_val --- wqflask/wqflask/templates/test_correlation_page.html | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/templates/test_correlation_page.html b/wqflask/wqflask/templates/test_correlation_page.html index be9ec226..037e9735 100644 --- a/wqflask/wqflask/templates/test_correlation_page.html +++ b/wqflask/wqflask/templates/test_correlation_page.html @@ -59,10 +59,11 @@ index trait_name Sample r - Sample p(r) + Sample r(p) N - Tissue rho - Lit rho + Tissue r + Tissue r(p) + Lit r @@ -116,7 +117,8 @@ console.log(correlationResults) { "data": "corr_coeffient","defaultContent": "--"}, { "data": "p_value","defaultContent":"--"}, { "data": "num_overlap","defaultContent":"--"}, - {"data":"tissue_corr","defaultContent":"--","title":"Tissue rho"}, + {"data":"tissue_corr","defaultContent":"--","title":"Tissue r"}, + {"data":"tissue_p_val","defaultContent":"--","title":"Tissue r(p)"}, {"data":"lit_corr","defaultContent":"--","title":"Lit rho"} ], "columnDefs": [ -- cgit v1.2.3 From 02916a787b384709d96eebfaefd4898cae415739 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 1 May 2021 03:23:32 +0300 Subject: delete demo correlation page --- .../wqflask/templates/demo_correlation_page.html | 107 --------------------- 1 file changed, 107 deletions(-) delete mode 100644 wqflask/wqflask/templates/demo_correlation_page.html diff --git a/wqflask/wqflask/templates/demo_correlation_page.html b/wqflask/wqflask/templates/demo_correlation_page.html deleted file mode 100644 index 67e3c57c..00000000 --- a/wqflask/wqflask/templates/demo_correlation_page.html +++ /dev/null @@ -1,107 +0,0 @@ -{% extends "base.html" %} -{% block title %}Demo Correlation Results{% endblock %} -{% block css %} - - - - - - -{% endblock %} -{% block content %} -
-
CORRELATION RESULTS
-
-

Trait_Name

-

Rho value

-

Num overlap

-

P value

-
-
- {% for corr_result in correlation_results %} - {% for key,value in corr_result.items()%} -
-

{{key}}

- - {% if "corr_coeffient" in value %} -

{{value["corr_coeffient"]}}

- {%elif "tissue_corr" in value %} -

{{value["tissue_corr"]}}

- {%elif "lit_corr" in value %} - {{value["lit_corr"]}} - {% endif %} - {%if "tissue_number" in value %} -
{{value["tissue_number"]}}
- {%elif "num_overlap" in value %} -

{{value["num_overlap"]}}

- {% endif %} -

{{value["p_value"]}}

- - - - - - {% endfor %} -
- {% endfor %} -
- -
- - -{% endblock %} - -- cgit v1.2.3 From 149f9c7c6804d4e717ed9aa3a42968b295693b3d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sat, 1 May 2021 03:24:05 +0300 Subject: autopep8 for file --- wqflask/wqflask/correlation/correlation_gn3_api.py | 126 ++++++++------------- 1 file changed, 45 insertions(+), 81 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 3c21a850..b56c09d8 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -1,61 +1,55 @@ """module that calls the gn3 api's to do the correlation """ import json -import requests -import time + from wqflask.correlation import correlation_functions from base import data_set + from base.trait import create_trait from base.trait import retrieve_sample_data -# gn3 lib + from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import map_shared_keys_to_values -from gn3.computations.correlations import compute_all_tissue_correlation from gn3.computations.correlations import compute_all_lit_correlation from gn3.computations.correlations import experimental_compute_all_tissue_correlation from gn3.db_utils import database_connector -GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" + +def create_target_this_trait(start_vars): + """this function creates the required trait and target dataset for correlation""" + + this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) + target_dataset = data_set.create_dataset( + dataset_name=start_vars['corr_dataset']) + this_trait = create_trait(dataset=this_dataset, + name=start_vars['trait_id']) + sample_data = () + return (this_dataset, this_trait, target_dataset, sample_data) def process_samples(start_vars, sample_names, excluded_samples=None): - """process samples method""" + """process samples""" sample_data = {} if not excluded_samples: excluded_samples = () - sample_vals_dict = json.loads(start_vars["sample_vals"]) - for sample in sample_names: if sample not in excluded_samples: val = sample_vals_dict[sample] if not val.strip().lower() == "x": sample_data[str(sample)] = float(val) - return sample_data -def create_target_this_trait(start_vars): - """this function creates the required trait and target dataset for correlation""" - - this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) - target_dataset = data_set.create_dataset( - dataset_name=start_vars['corr_dataset']) +def sample_for_trait_lists(corr_results, target_dataset, + this_trait, this_dataset, start_vars): + """interface function for correlation on top results""" - this_trait = create_trait(dataset=this_dataset, - name=start_vars['trait_id']) - - # target_dataset.get_trait_data(list(self.sample_data.keys())) - - # this_trait = retrieve_sample_data(this_trait, this_dataset) - sample_data = () - return (this_dataset, this_trait, target_dataset, sample_data) - - -def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_dataset, start_vars): sample_data = process_samples( start_vars, this_dataset.group.samplelist) target_dataset.get_trait_data(list(sample_data.keys())) + # should filter target traits from here + _corr_results = corr_results this_trait = retrieve_sample_data(this_trait, this_dataset) @@ -69,65 +63,55 @@ def sample_for_trait_lists(corr_results, target_dataset, this_trait, this_datase this_trait=this_trait_data, target_dataset=results) - return correlation_results -def tissue_for_trait_lists(corr_results, this_dataset, target_dataset, this_trait): - trait_lists = dict([(list(corr_result)[0], True) - for corr_result in corr_results]) +def tissue_for_trait_lists(corr_results, this_dataset, this_trait): + """interface function for doing tissue corr_results on trait_list""" + # trait_lists = dict([(list(corr_result)[0], True) + # for corr_result in corr_results]) + trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} traits_symbol_dict = this_dataset.retrieve_genes("Symbol") traits_symbol_dict = dict({trait_name: symbol for ( trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) primary_tissue_data, target_tissue_data = get_tissue_correlation_input( this_trait, traits_symbol_dict) corr_results = experimental_compute_all_tissue_correlation( - primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson") + primary_tissue_dict=primary_tissue_data, + target_tissues_data=target_tissue_data, + corr_method="pearson") return corr_results def compute_correlation(start_vars, method="pearson"): """compute correlation for to call gn3 api""" - import time + # pylint: disable-msg=too-many-locals corr_type = start_vars['corr_type'] (this_dataset, this_trait, target_dataset, sample_data) = create_target_this_trait(start_vars) - # cor_results = compute_correlation(start_vars) - method = start_vars['corr_sample_method'] - + _corr_return_results = start_vars.get("corr_return_results", 100) corr_input_data = {} if corr_type == "sample": - + sample_data = process_samples( start_vars, this_dataset.group.samplelist) - initial_time = time.time() target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) - print("Creating target dataset and trait took", time.time()-initial_time) - - this_trait_data = { "trait_sample_data": sample_data, "trait_id": start_vars["trait_id"] } - initial_time = time.time() results = map_shared_keys_to_values( target_dataset.samplelist, target_dataset.trait_data) correlation_results = compute_all_sample_correlation(corr_method=method, this_trait=this_trait_data, target_dataset=results) - print("doing sample correlation took", time.time()-initial_time) - # other_results = tissue_for_trait_lists( - # correlation_results, this_dataset, target_dataset, this_trait) - # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" - return correlation_results - elif corr_type == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") primary_tissue_data, target_tissue_data = get_tissue_correlation_input( @@ -137,50 +121,33 @@ def compute_correlation(start_vars, method="pearson"): "primary_tissue": primary_tissue_data, "target_tissues_dict": target_tissue_data } - initial_time = time.time() - correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], - target_tissues_data=corr_input_data[ - "target_tissues_dict"], - corr_method=method) - print("computing tissue took >>>>", time.time()-initial_time) - # sample_results = sample_for_trait_lists( - # correlation_results, target_dataset, this_trait, this_dataset, start_vars) - return correlation_results + correlation_results = experimental_compute_all_tissue_correlation( + primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data[ + "target_tissues_dict"], + corr_method=method + + ) elif corr_type == "lit": (this_trait_geneid, geneid_dict, species) = do_lit_correlation( - this_trait, this_dataset, target_dataset) + this_trait, this_dataset) conn, _cursor_object = database_connector() - initial_time = time.time() with conn: - - lit_corr_results = compute_all_lit_correlation( + correlation_results = compute_all_lit_correlation( conn=conn, trait_lists=list(geneid_dict.items()), species=species, gene_id=this_trait_geneid) - return lit_corr_results - # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" - # corr_input_data = geneid_dict - # corr_results = requests.post(requests_url, json=corr_input_data) - - # data = corr_results.json() - - # return data + return correlation_results -def do_lit_correlation(this_trait, this_dataset, target_dataset): +def do_lit_correlation(this_trait, this_dataset): + """function for fetching lit inputs""" geneid_dict = this_dataset.retrieve_genes("GeneId") - # - print("CALLING THE LIT CORRELATION HERE") species = this_dataset.group.species.lower() - - this_trait_geneid = this_trait.geneid - this_trait_gene_data = { - this_trait.name: this_trait_geneid - } - - return (this_trait_geneid, geneid_dict, species) + trait_geneid = this_trait.geneid + return (trait_geneid, geneid_dict, species) def get_tissue_correlation_input(this_trait, trait_symbol_dict): @@ -190,7 +157,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=list(trait_symbol_dict.values())) primary_tissue_data = { @@ -202,7 +168,5 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): "trait_symbol_dict": trait_symbol_dict, "symbol_tissue_vals_dict": corr_result_tissue_vals_dict } - return (primary_tissue_data, target_tissue_data) - return None -- cgit v1.2.3 From 913d2e9113635ccf53140d53aaad55f09fc1df26 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 3 May 2021 20:48:00 +0300 Subject: apply limit to for results --- wqflask/wqflask/correlation/correlation_gn3_api.py | 10 +++++----- wqflask/wqflask/views.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index b56c09d8..4949bbe2 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -11,7 +11,7 @@ from base.trait import retrieve_sample_data from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import map_shared_keys_to_values from gn3.computations.correlations import compute_all_lit_correlation -from gn3.computations.correlations import experimental_compute_all_tissue_correlation +from gn3.computations.correlations import compute_tissue_correlation from gn3.db_utils import database_connector @@ -76,7 +76,7 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait): trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) primary_tissue_data, target_tissue_data = get_tissue_correlation_input( this_trait, traits_symbol_dict) - corr_results = experimental_compute_all_tissue_correlation( + corr_results = compute_tissue_correlation( primary_tissue_dict=primary_tissue_data, target_tissues_data=target_tissue_data, corr_method="pearson") @@ -93,7 +93,7 @@ def compute_correlation(start_vars, method="pearson"): sample_data) = create_target_this_trait(start_vars) method = start_vars['corr_sample_method'] - _corr_return_results = start_vars.get("corr_return_results", 100) + corr_return_results = int(start_vars.get("corr_return_results", 100)) corr_input_data = {} if corr_type == "sample": @@ -121,7 +121,7 @@ def compute_correlation(start_vars, method="pearson"): "primary_tissue": primary_tissue_data, "target_tissues_dict": target_tissue_data } - correlation_results = experimental_compute_all_tissue_correlation( + correlation_results = compute_tissue_correlation( primary_tissue_dict=corr_input_data["primary_tissue"], target_tissues_data=corr_input_data[ "target_tissues_dict"], @@ -139,7 +139,7 @@ def compute_correlation(start_vars, method="pearson"): conn=conn, trait_lists=list(geneid_dict.items()), species=species, gene_id=this_trait_geneid) - return correlation_results + return correlation_results[0:corr_return_results] def do_lit_correlation(this_trait, this_dataset): diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 3d4376e2..b042a211 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -885,7 +885,7 @@ def corr_compute_page(): initial_time = time.time() correlation_results = compute_correlation(request.form) print(">>>>Time taken by this endpoint",time.time()-initial_time) - return render_template("test_correlation_page.html",correlation_results=correlation_results[0:50]) + return render_template("test_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From 049a438741d19b1cf6c8c290ec01343b51690b30 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Sun, 9 May 2021 00:17:33 +0300 Subject: fix:add lit for trait list --- wqflask/wqflask/correlation/correlation_gn3_api.py | 39 ++++++++++++++++------ 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index 4949bbe2..d68bb604 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -83,6 +83,23 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait): return corr_results +def lit_for_trait_list(corr_results, this_dataset, this_trait): + (this_trait_geneid, geneid_dict, species) = do_lit_correlation( + this_trait, this_dataset) + + trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + + geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict if + trait_lists.get(trait_name)} + + conn, _cursor_object = database_connector() + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + return correlation_results[0:corr_return_results] + + def compute_correlation(start_vars, method="pearson"): """compute correlation for to call gn3 api""" # pylint: disable-msg=too-many-locals @@ -136,30 +153,30 @@ def compute_correlation(start_vars, method="pearson"): conn, _cursor_object = database_connector() with conn: correlation_results = compute_all_lit_correlation( - conn=conn, trait_lists=list(geneid_dict.items()), - species=species, gene_id=this_trait_geneid) + conn = conn, trait_lists = list(geneid_dict.items()), + species = species, gene_id = this_trait_geneid) return correlation_results[0:corr_return_results] def do_lit_correlation(this_trait, this_dataset): """function for fetching lit inputs""" - geneid_dict = this_dataset.retrieve_genes("GeneId") - species = this_dataset.group.species.lower() - trait_geneid = this_trait.geneid + geneid_dict=this_dataset.retrieve_genes("GeneId") + species=this_dataset.group.species.lower() + trait_geneid=this_trait.geneid return (trait_geneid, geneid_dict, species) def get_tissue_correlation_input(this_trait, trait_symbol_dict): """Gets tissue expression values for the primary trait and target tissues values""" - primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=[this_trait.symbol]) + primary_trait_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list = [this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( + primary_trait_tissue_values=primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] - corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list=list(trait_symbol_dict.values())) - primary_tissue_data = { + corr_result_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list = list(trait_symbol_dict.values())) + primary_tissue_data={ "this_id": this_trait.name, "tissue_values": primary_trait_tissue_values -- cgit v1.2.3 From 27538980f93c1d72b0b2d76151312f3fbce4c9a5 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:24:42 +0300 Subject: add previous endpoint for correlation --- wqflask/base/data_set.py | 37 ------------------------------------- wqflask/wqflask/views.py | 13 ++++++++----- 2 files changed, 8 insertions(+), 42 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 468c4da0..d0f5e6f2 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -655,42 +655,7 @@ class DataSet(object): "Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - def fetch_probe_trait_data(self, sample_list=None): - if sample_list: - self.samplelist = sample_list - else: - self.samplelist = self.group.samplelist - - if self.group.parlist != None and self.group.f1list != None: - if (self.group.parlist + self.group.f1list) in self.samplelist: - self.samplelist += self.group.parlist + self.group.f1list - - query = """ - SELECT Strain.Name, Strain.Id FROM Strain, Species - WHERE Strain.Name IN {} - and Strain.SpeciesId=Species.Id - and Species.name = '{}' - """.format(create_in_clause(self.samplelist), *mescape(self.group.species)) - logger.sql(query) - results = dict(g.db.execute(query).fetchall()) - sample_ids = [results[item] for item in self.samplelist] - - query = """SELECT * from ProbeSetData WHERE Id in ( SELECT ProbeSetXRef.DataId FROM (ProbeSet, ProbeSetXRef, ProbeSetFreeze) WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id and ProbeSetFreeze.Name = 'HC_M2_0606_P' and ProbeSet.Id = ProbeSetXRef.ProbeSetId order by ProbeSet.Id ) and StrainId in ({})""".format( - ",".join(str(sample_id) for sample_id in sample_ids)) - results = g.db.execute(query).fetchall() - - # with conn: - # cursor = conn.cursor() - # cursor.execute(query) - # results = cursor.fetchall() - trait_data = {} - for trait_id, StrainId, value in results: - if trait_id in trait_data: - trait_data[trait_id].append(value) - else: - trait_data[trait_id] = [value] - self.trait_data = trait_data def get_trait_data(self, sample_list=None): if sample_list: @@ -711,7 +676,6 @@ class DataSet(object): logger.sql(query) results = dict(g.db.execute(query).fetchall()) sample_ids = [results[item] for item in self.samplelist] - print("the number of sample ids are", len(sample_ids)) # MySQL limits the number of tables that can be used in a join to 61, # so we break the sample ids into smaller chunks @@ -762,7 +726,6 @@ class DataSet(object): trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) - print("the trait count is >>>", trait_count) self.trait_data = collections.defaultdict(list) # put all of the separate data together into a dictionary where the keys are diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index b042a211..19779651 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -881,11 +881,14 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - import time - initial_time = time.time() - correlation_results = compute_correlation(request.form) - print(">>>>Time taken by this endpoint",time.time()-initial_time) - return render_template("test_correlation_page.html",correlation_results=correlation_results) + template_vars = show_corr_results.CorrelationResults(request.form) + return render_template("correlation_page.html", **template_vars.__dict__) + + # to test the new correlation api uncomment these lines + + # correlation_results = compute_correlation(request.form) + # print(">>>>Time taken by this endpoint",time.time()-initial_time) + # return render_template("test_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From aebbbc9240ab63f684dae2451cfc335681c049db Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:25:21 +0300 Subject: remove debug statements --- wqflask/wqflask/correlation/show_corr_results.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index 50b3ba26..7f69807b 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -57,6 +57,7 @@ TISSUE_METHODS = [METHOD_TISSUE_PEARSON, METHOD_TISSUE_RANK] TISSUE_MOUSE_DB = 1 + class CorrelationResults(object): def __init__(self, start_vars): # get trait list from db (database name) @@ -445,9 +446,6 @@ class CorrelationResults(object): """ - print("below here>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") - print(self.target_dataset.trait_data) - self.this_trait_vals = [] target_vals = [] for index, sample in enumerate(self.target_dataset.samplelist): -- cgit v1.2.3 From bc29c9dd907c8c1b024231fa713040b3c4092f3a Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:29:46 +0300 Subject: revert python path --- bin/genenetwork2 | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index f73f235c..5f4e0f9a 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -154,8 +154,7 @@ if [ ! -d $R_LIBS_SITE ] ; then fi # We may change this one: -# export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH -PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$HOME/project/genenetwork3:$PYTHONPATH +export PYTHONPATH=$PYTHON_GN_PATH:$GN2_BASE_DIR/wqflask:$PYTHONPATH # Our UNIX TMPDIR defaults to /tmp - change this on a shared server if [ -z $TMPDIR ]; then -- cgit v1.2.3 From 93baf59aca12fb85e668315e67d36137c854f12d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:33:22 +0300 Subject: minor changes to interface --- wqflask/wqflask/correlation/correlation_gn3_api.py | 60 ++++++++++++++-------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index d68bb604..98d52591 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -68,9 +68,9 @@ def sample_for_trait_lists(corr_results, target_dataset, def tissue_for_trait_lists(corr_results, this_dataset, this_trait): """interface function for doing tissue corr_results on trait_list""" - # trait_lists = dict([(list(corr_result)[0], True) - # for corr_result in corr_results]) - trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} traits_symbol_dict = this_dataset.retrieve_genes("Symbol") traits_symbol_dict = dict({trait_name: symbol for ( trait_name, symbol) in traits_symbol_dict.items() if trait_lists.get(trait_name)}) @@ -85,19 +85,24 @@ def tissue_for_trait_lists(corr_results, this_dataset, this_trait): def lit_for_trait_list(corr_results, this_dataset, this_trait): (this_trait_geneid, geneid_dict, species) = do_lit_correlation( - this_trait, this_dataset) + this_trait, this_dataset) - trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + # trait_lists = {list(corr_results)[0]: 1 for corr_result in corr_results} + trait_lists = dict([(list(corr_result)[0], True) + for corr_result in corr_results]) - geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict if - trait_lists.get(trait_name)} + geneid_dict = {trait_name: geneid for (trait_name, geneid) in geneid_dict.items() if + trait_lists.get(trait_name)} conn, _cursor_object = database_connector() - correlation_results = compute_all_lit_correlation( - conn=conn, trait_lists=list(geneid_dict.items()), - species=species, gene_id=this_trait_geneid) - return correlation_results[0:corr_return_results] + with conn: + + correlation_results = compute_all_lit_correlation( + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) + + return correlation_results def compute_correlation(start_vars, method="pearson"): @@ -129,6 +134,17 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) + # do tissue correaltion + + # code to be use later + + # tissue_result = tissue_for_trait_lists( + # correlation_results, this_dataset, this_trait) + # # lit spoils the party so slow + # lit_result = lit_for_trait_list( + # correlation_results, this_dataset, this_trait) + + elif corr_type == "tissue": trait_symbol_dict = this_dataset.retrieve_genes("Symbol") primary_tissue_data, target_tissue_data = get_tissue_correlation_input( @@ -153,30 +169,30 @@ def compute_correlation(start_vars, method="pearson"): conn, _cursor_object = database_connector() with conn: correlation_results = compute_all_lit_correlation( - conn = conn, trait_lists = list(geneid_dict.items()), - species = species, gene_id = this_trait_geneid) + conn=conn, trait_lists=list(geneid_dict.items()), + species=species, gene_id=this_trait_geneid) return correlation_results[0:corr_return_results] def do_lit_correlation(this_trait, this_dataset): """function for fetching lit inputs""" - geneid_dict=this_dataset.retrieve_genes("GeneId") - species=this_dataset.group.species.lower() - trait_geneid=this_trait.geneid + geneid_dict = this_dataset.retrieve_genes("GeneId") + species = this_dataset.group.species.lower() + trait_geneid = this_trait.geneid return (trait_geneid, geneid_dict, species) def get_tissue_correlation_input(this_trait, trait_symbol_dict): """Gets tissue expression values for the primary trait and target tissues values""" - primary_trait_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = [this_trait.symbol]) + primary_trait_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=[this_trait.symbol]) if this_trait.symbol.lower() in primary_trait_tissue_vals_dict: - primary_trait_tissue_values=primary_trait_tissue_vals_dict[this_trait.symbol.lower( + primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] - corr_result_tissue_vals_dict=correlation_functions.get_trait_symbol_and_tissue_values( - symbol_list = list(trait_symbol_dict.values())) - primary_tissue_data={ + corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( + symbol_list=list(trait_symbol_dict.values())) + primary_tissue_data = { "this_id": this_trait.name, "tissue_values": primary_trait_tissue_values -- cgit v1.2.3 From 0b723720f7b1b9802b2f5453b747c7e48b693817 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:34:10 +0300 Subject: use new correlation code in endpoint --- wqflask/wqflask/views.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 19779651..a470a0c9 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -881,14 +881,14 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - template_vars = show_corr_results.CorrelationResults(request.form) - return render_template("correlation_page.html", **template_vars.__dict__) + # template_vars = show_corr_results.CorrelationResults(request.form) + # return render_template("correlation_page.html", **template_vars.__dict__) # to test the new correlation api uncomment these lines - # correlation_results = compute_correlation(request.form) + correlation_results = compute_correlation(request.form) # print(">>>>Time taken by this endpoint",time.time()-initial_time) - # return render_template("test_correlation_page.html",correlation_results=correlation_results) + return render_template("test_correlation_page.html",correlation_results=correlation_results) @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): -- cgit v1.2.3 From 848fc236461384e5352798e19d710480ead379c4 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Mon, 10 May 2021 08:50:20 +0300 Subject: minor-fix:remove debug statements --- wqflask/wqflask/views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 317c5f63..c2e403a0 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -951,10 +951,9 @@ def corr_compute_page(): # template_vars = show_corr_results.CorrelationResults(request.form) # return render_template("correlation_page.html", **template_vars.__dict__) - # to test the new correlation api uncomment these lines + # to test/disable the new correlation api uncomment these lines correlation_results = compute_correlation(request.form) - # print(">>>>Time taken by this endpoint",time.time()-initial_time) return render_template("test_correlation_page.html",correlation_results=correlation_results) -- cgit v1.2.3 From 56fc1a2a53496a8b3720515f61e54a74cc95821e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 11 May 2021 00:29:33 +0300 Subject: modify js & add button for test correlation --- wqflask/wqflask/static/new/javascript/show_trait.js | 10 ++++++++++ .../templates/show_trait_calculate_correlations.html | 3 +++ wqflask/wqflask/views.py | 15 ++++++++++----- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/wqflask/wqflask/static/new/javascript/show_trait.js b/wqflask/wqflask/static/new/javascript/show_trait.js index 6e9d68c4..569046d3 100644 --- a/wqflask/wqflask/static/new/javascript/show_trait.js +++ b/wqflask/wqflask/static/new/javascript/show_trait.js @@ -585,6 +585,16 @@ get_table_contents_for_form_submit = function(form_id) { var corr_input_list = ['sample_vals', 'corr_type', 'primary_samples', 'trait_id', 'dataset', 'group', 'tool_used', 'form_url', 'corr_sample_method', 'corr_samples_group', 'corr_dataset', 'min_expr', 'corr_return_results', 'location_type', 'loc_chr', 'min_loc_mb', 'max_loc_mb', 'p_range_lower', 'p_range_upper'] +$(".test_corr_compute").on("click", (function(_this) { + return function() { + $('input[name=tool_used]').val("Correlation"); + $('input[name=form_url]').val("/test_corr_compute"); + $('input[name=wanted_inputs]').val(corr_input_list.join(",")); + url = "/loading"; + return submit_special(url); + }; +})(this)); + $(".corr_compute").on("click", (function(_this) { return function() { $('input[name=tool_used]').val("Correlation"); diff --git a/wqflask/wqflask/templates/show_trait_calculate_correlations.html b/wqflask/wqflask/templates/show_trait_calculate_correlations.html index 50803978..ef784c84 100644 --- a/wqflask/wqflask/templates/show_trait_calculate_correlations.html +++ b/wqflask/wqflask/templates/show_trait_calculate_correlations.html @@ -117,6 +117,9 @@
+
+ +
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index c2e403a0..f3b0257c 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -705,7 +705,7 @@ def mapping_results_container_page(): @app.route("/loading", methods=('POST',)) def loading_page(): - logger.info(request.url) + # logger.info(request.url) initial_start_vars = request.form start_vars_container = {} n_samples = 0 # ZS: So it can be displayed on loading page @@ -948,15 +948,20 @@ def network_graph_page(): def corr_compute_page(): logger.info("In corr_compute, request.form is:", pf(request.form)) logger.info(request.url) - # template_vars = show_corr_results.CorrelationResults(request.form) - # return render_template("correlation_page.html", **template_vars.__dict__) + template_vars = show_corr_results.CorrelationResults(request.form) + return render_template("correlation_page.html", **template_vars.__dict__) # to test/disable the new correlation api uncomment these lines - correlation_results = compute_correlation(request.form) - return render_template("test_correlation_page.html",correlation_results=correlation_results) + # correlation_results = compute_correlation(request.form) + # return render_template("test_correlation_page.html", correlation_results=correlation_results) +@app.route("/test_corr_compute", methods=["POST"]) +def test_corr_compute_page(): + correlation_results = compute_correlation(request.form) + return render_template("test_correlation_page.html", correlation_results=correlation_results) + @app.route("/corr_matrix", methods=('POST',)) def corr_matrix_page(): logger.info("In corr_matrix, request.form is:", pf(request.form)) -- cgit v1.2.3 From e159fb1e0d9a9874be9a3475dd4a1a055d9204ad Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 11 Feb 2021 04:33:55 +0300 Subject: replace datatable scroller --- wqflask/wqflask/templates/mapping_results.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/templates/mapping_results.html b/wqflask/wqflask/templates/mapping_results.html index 73d7501b..d6fc6e37 100644 --- a/wqflask/wqflask/templates/mapping_results.html +++ b/wqflask/wqflask/templates/mapping_results.html @@ -357,7 +357,7 @@ {% endif %} - + -- cgit v1.2.3 From 92afd94c14891bec381b6de5ecf9926032bab908 Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 11 Feb 2021 04:37:11 +0300 Subject: replace datatable scroller cdn --- wqflask/wqflask/templates/show_trait.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 09ecb7b6..fc14822c 100644 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -148,7 +148,7 @@ - + -- cgit v1.2.3 From 4f826611242080089856ccb4e3a7cda398e57b0d Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Thu, 11 Feb 2021 04:46:20 +0300 Subject: replace font-awesome --- wqflask/wqflask/templates/correlation_page.html | 4 ++-- wqflask/wqflask/templates/search_result_page.html | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/wqflask/wqflask/templates/correlation_page.html b/wqflask/wqflask/templates/correlation_page.html index a9a3e1a0..4cad2749 100644 --- a/wqflask/wqflask/templates/correlation_page.html +++ b/wqflask/wqflask/templates/correlation_page.html @@ -4,7 +4,7 @@ - + {% endblock %} @@ -141,7 +141,7 @@ - + diff --git a/wqflask/wqflask/templates/search_result_page.html b/wqflask/wqflask/templates/search_result_page.html index e7a7bc51..827bad98 100644 --- a/wqflask/wqflask/templates/search_result_page.html +++ b/wqflask/wqflask/templates/search_result_page.html @@ -4,7 +4,7 @@ - + {% endblock %} @@ -150,7 +150,7 @@ - + -- cgit v1.2.3 From 45fc1da136b78bac906aad013686a9530f68bd5e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 10:33:49 +0300 Subject: doc: docker-container: Default to python3-genenetwork2 for examples --- doc/docker-container.org | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/docker-container.org b/doc/docker-container.org index ec91824a..c894c4b5 100644 --- a/doc/docker-container.org +++ b/doc/docker-container.org @@ -59,11 +59,11 @@ RUN tar -xzf /tmp/gn2.tar.gz -C / && rm -f /tmp/gn2.tar.gz && \ Build the image(Note the fullstop at the end): -: sudo docker build -t python2-genenetwork2:latest -f Dockerfile . +: sudo docker build -t python3-genenetwork2:latest -f Dockerfile . To load the image interactively you've just created: -: docker run -ti "python2-genenetwork2:latest" bash +: docker run -ti "python3-genenetwork2:latest" bash Assuming you have a docker instance running, you could always run commands in it e.g: @@ -78,7 +78,7 @@ CI environment using Github Actions. To push to dockerhub, first get the image name by running =docker images=. Push to dockerhub using a command similar to: -: docker push bonfacekilz/python2-genenetwork2:latest +: docker push bonfacekilz/python3-genenetwork2:latest Right now, we have 2 images on DockerHub: -- cgit v1.2.3 From a3ea9cb840bc8dbc9e5e0940ec72a36712e13a2a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 10:47:51 +0300 Subject: workflows: main.yml: Update container image --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a36abc0a..64c0fdb2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -11,7 +11,7 @@ on: jobs: unittest: runs-on: ubuntu-latest - container: bonfacekilz/python3-genenetwork2:ad741c1 + container: bonfacekilz/python3-genenetwork2:00ba1f8 steps: # First start with mariadb set then checkout. The checkout gives -- cgit v1.2.3 From 5933991c3a82e847ebf0a86583aa5461d8c3f937 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 11:06:22 +0300 Subject: links_scraper: genelinks: Remove unused import --- test/requests/links_scraper/genelinks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py index 12300f4a..13aee7c8 100644 --- a/test/requests/links_scraper/genelinks.py +++ b/test/requests/links_scraper/genelinks.py @@ -2,7 +2,6 @@ import re import requests import urllib3 import os -import logging from urllib.request import urlopen as uReq from bs4 import BeautifulSoup as soup -- cgit v1.2.3 From d32b7f3a877f33ca90af2a4206d8e0d5e04c36de Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 11:08:57 +0300 Subject: links_scraper: genelinks: Apply pep-8 formatting --- test/requests/links_scraper/genelinks.py | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/test/requests/links_scraper/genelinks.py b/test/requests/links_scraper/genelinks.py index 13aee7c8..52c13489 100644 --- a/test/requests/links_scraper/genelinks.py +++ b/test/requests/links_scraper/genelinks.py @@ -25,7 +25,6 @@ def search_templates(): parsed_page = soup( open(file_path, encoding="utf8"), "html.parser") html_parsed_pages.append(parsed_page) - return html_parsed_pages @@ -33,7 +32,7 @@ def is_valid_link(url_link): try: result = urlparse(url_link) return all([result.scheme, result.netloc, result.path]) - except Exception as e: + except Exception: return False @@ -41,13 +40,10 @@ def test_link(link): print(f'Checking -->{link}') results = None try: - results = requests.get(link, verify=False, timeout=10) status_code = results.status_code - - except Exception as e: + except Exception: status_code = 408 - return int(status_code) > 403 @@ -55,14 +51,11 @@ def fetch_css_links(parsed_page): print("fetching css links") for link in parsed_page.findAll("link"): full_path = None - link_url = link.attrs.get("href") if is_valid_link(link_url): full_path = link_url - elif re.match(r"^/css", link_url) or re.match(r"^/js", link_url): full_path = urljoin('http://localhost:5004/', link_url) - if full_path is not None: if test_link(full_path): BROKEN_LINKS.add(full_path) @@ -70,16 +63,13 @@ def fetch_css_links(parsed_page): def fetch_html_links(parsed_page): print("fetching a tags ") - for link in parsed_page.findAll("a"): full_path = None link_url = link.attrs.get("href") if re.match(r"^/", link_url): full_path = urljoin('http://localhost:5004/', link_url) - elif is_valid_link(link_url): full_path = link_url - if full_path is not None: if test_link(full_path): BROKEN_LINKS.add(full_path) @@ -91,8 +81,11 @@ def fetch_script_tags(parsed_page): js_link = link.attrs.get("src") if js_link is not None: if is_valid_link(js_link): - raise SystemExit("Failed,the library should be packaged in guix.\ - Please contact,http://genenetwork.org/ for more details") + raise SystemExit("Failed,the library should be " + "packaged in guix. " + "Please contact, " + "http://genenetwork.org/ " + "for more details") elif re.match(r"^/css", js_link) or re.match(r"^/js", js_link): full_path = urljoin('http://localhost:5004/', js_link) @@ -101,11 +94,9 @@ def fetch_script_tags(parsed_page): def fetch_page_links(page_url): - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) html_page = uReq(page_url) parsed_page = soup(html_page, "html.parser") - fetch_script_tags(parsed_page=parsed_page) fetch_css_links(parsed_page=parsed_page) fetch_html_links(parsed_page=parsed_page) @@ -113,13 +104,10 @@ def fetch_page_links(page_url): def webpages_to_check(): pages = [f"http://localhost:{PORT}/"] - return pages if __name__ == '__main__': - # results = search_templates() - for page in webpages_to_check(): fetch_page_links(page) if len(BROKEN_LINKS) > 0: @@ -129,4 +117,5 @@ if __name__ == '__main__': if len(BROKEN_LINKS) > 0: raise SystemExit( - "The links Above are broken.Please contact genenetwork.org<<<<<<<<") + "The links Above are broken. " + "Please contact genenetwork.org<<<<<<<<") -- cgit v1.2.3 From 5e3e8e5847fe994652e3c6675ce96312fd9dc16a Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 11:09:36 +0300 Subject: workflows: main.yml: Remove hard-coded path --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 64c0fdb2..2b7c3b16 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -57,10 +57,10 @@ jobs: - name: Test for Broken Links run: | - env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp\ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py -c /__w/genenetwork2/genenetwork2/test/requests/links_scraper/genelinks.py + etc/default_settings.py -c \ + $PWD/test/requests/links_scraper/genelinks.py -- cgit v1.2.3 From 232148b1304ae531df6d9157f6d574d5c944830e Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 11:30:53 +0300 Subject: workflows: main.yml: Use screen to run gn2 --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2b7c3b16..f9a3fa13 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -49,11 +49,11 @@ jobs: - name: Start Genenetwork as a Background Task run: | - env GN2_PROFILE=/gn2-profile \ + /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py& + etc/default_settings.py" - name: Test for Broken Links run: | -- cgit v1.2.3 From 4538c0ad693ae94ebda5f0bf39678d776c0c8297 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 11:47:20 +0300 Subject: workflows: main.yml: Set up gn2 before running the unit tests --- .github/workflows/main.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f9a3fa13..b09f2f1d 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -39,21 +39,21 @@ jobs: mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';" mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;" - - name: Run the unit tests + - name: Start Genenetwork as a Background Task run: | - env GN2_PROFILE=/gn2-profile \ + /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py -c -m unittest discover -v + etc/default_settings.py" - - name: Start Genenetwork as a Background Task + - name: Run the unit tests run: | - /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \ + env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ - etc/default_settings.py" + etc/default_settings.py -c -m unittest discover -v - name: Test for Broken Links run: | -- cgit v1.2.3 From 75b378f757979af1d6fdaff3f3abde0599c5f744 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 12:10:37 +0300 Subject: workflows: main.yml: Use mariadb as a service --- .github/workflows/main.yml | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b09f2f1d..a2925e08 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,16 +12,19 @@ jobs: unittest: runs-on: ubuntu-latest container: bonfacekilz/python3-genenetwork2:00ba1f8 + services: + mysql: + image: mariadb:15.1 + env: + MYSQL_ALLOW_EMPTY_PASSWORD: yes + MYSQL_USER: gn2 + MYSQL_PASSWORD: mysql_password + MYSQL_DATABASE: db_webqtl_s + ports: + - 3306:3306 + options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 steps: - # First start with mariadb set then checkout. The checkout gives - # the mysqld enough time to start - - name: Set up mariadb - run: | - mysql_install_db --user=mysql --datadir=/usr/local/mysql - # Wait for the mysqld_safe process to start - mysqld_safe --user=mysql --datadir=/usr/local/mysql & - # Use v1 of checkout since v2 fails - name: Checkout Project uses: actions/checkout@v1 @@ -31,17 +34,10 @@ jobs: run: | /gn2-profile/bin/screen -dmLS redisconn /gn2-profile/bin/redis-server - # Redis is required by some of the tests 6379 - - name: Bootstrap tables - run: | - mysql -u root -e "SHOW DATABASES;" - mysql -u root -e "CREATE DATABASE db_webqtl_s;" - mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';" - mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;" - - name: Start Genenetwork as a Background Task run: | - /gn2-profile/bin/screen -dmL bash -c "env GN2_PROFILE=/gn2-profile \ + /gn2-profile/bin/screen -dmLS gn2conn + bash -c "env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ -- cgit v1.2.3 From c4ec08c1b544c05cce1c1247b8bc1afb741643e4 Mon Sep 17 00:00:00 2001 From: BonfaceKilz Date: Tue, 11 May 2021 13:38:47 +0300 Subject: workflows: main.yml: Remove mysql service and update gn2 b/g task --- .github/workflows/main.yml | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a2925e08..c6b56a81 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,19 +12,16 @@ jobs: unittest: runs-on: ubuntu-latest container: bonfacekilz/python3-genenetwork2:00ba1f8 - services: - mysql: - image: mariadb:15.1 - env: - MYSQL_ALLOW_EMPTY_PASSWORD: yes - MYSQL_USER: gn2 - MYSQL_PASSWORD: mysql_password - MYSQL_DATABASE: db_webqtl_s - ports: - - 3306:3306 - options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3 steps: + # First start with mariadb set then checkout. The checkout gives + # the mysqld enough time to start + - name: Set up mariadb + run: | + mysql_install_db --user=mysql --datadir=/usr/local/mysql + # Wait for the mysqld_safe process to start + mysqld_safe --user=mysql --datadir=/usr/local/mysql & + # Use v1 of checkout since v2 fails - name: Checkout Project uses: actions/checkout@v1 @@ -34,10 +31,17 @@ jobs: run: | /gn2-profile/bin/screen -dmLS redisconn /gn2-profile/bin/redis-server + # Initialise the tables + - name: Bootstrap tables + run: | + mysql -u root -e "SHOW DATABASES;" + mysql -u root -e "CREATE DATABASE db_webqtl_s;" + mysql -u root -e "CREATE USER 'gn2'@'localhost' IDENTIFIED BY 'mysql_password';" + mysql -u root -e "GRANT ALL PRIVILEGES ON *.* TO 'gn2'@'localhost';FLUSH PRIVILEGES;" + - name: Start Genenetwork as a Background Task run: | - /gn2-profile/bin/screen -dmLS gn2conn - bash -c "env GN2_PROFILE=/gn2-profile \ + /gn2-profile/bin/screen -dm bash -c "env GN2_PROFILE=/gn2-profile \ TMPDIR=/tmp SERVER_PORT=5004 \ WEBSERVER_MODE=DEBUG LOG_LEVEL=DEBUG \ GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ @@ -59,4 +63,3 @@ jobs: GENENETWORK_FILES=/genotype_files/ bin/genenetwork2 \ etc/default_settings.py -c \ $PWD/test/requests/links_scraper/genelinks.py - -- cgit v1.2.3