From 34e4933de5a1cd444abe618fcfd93b424bf3442e Mon Sep 17 00:00:00 2001 From: Alexander Kabui Date: Tue, 20 Apr 2021 01:38:26 +0300 Subject: refactor code for iterating mrna tissue data --- wqflask/base/mrna_assay_tissue_data.py | 39 +++++++++++++++++++--- .../wqflask/correlation/correlation_functions.py | 6 ++-- wqflask/wqflask/correlation/correlation_gn3_api.py | 24 +++++++------ 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py index f1929518..0220d73b 100644 --- a/wqflask/base/mrna_assay_tissue_data.py +++ b/wqflask/base/mrna_assay_tissue_data.py @@ -6,6 +6,7 @@ from utility import db_tools from utility import Bunch from utility.db_tools import escape +from gn3.db_utils import database_connector from utility.logger import getLogger @@ -44,16 +45,42 @@ class MrnaAssayTissueData(object): and t.Mean = x.maxmean; '''.format(in_clause) - results = g.db.execute(query).fetchall() - lower_symbols = [] + # lower_symbols = [] + lower_symbols = {} for gene_symbol in gene_symbols: + # lower_symbols[gene_symbol.lower()] = True if gene_symbol != None: - lower_symbols.append(gene_symbol.lower()) - + lower_symbols[gene_symbol.lower()] = True + + import time + # initial_time = time.time() + # conn,cursor = database_connector() + # cursor.execute(query) + # for result in cursor.fetchall(): + # symbol = result[0] + # self.data[symbol].gene_id = result[1] + # self.data[symbol].data_id = result[2] + # self.data[symbol].chr = result[3] + # self.data[symbol].mb = result[4] + # self.data[symbol].description = result[5] + # self.data[symbol].probe_target_description = result[6] + + + # print("my loop takes>>>>",time.time()-initial_time) + # conn.close() + # r + + # takes 5 seconds + initial_time = time.time() + results = list(g.db.execute(query).fetchall()) for result in results: symbol = result[0] - if symbol.lower() in lower_symbols: + # if symbol is not None + # exists = lower_symbols.get(symbol.lower()) + # if symbol.lower() in lower_symbols: + if symbol is not None and lower_symbols.get(symbol.lower()): + symbol = symbol.lower() self.data[symbol].gene_id = result.GeneId @@ -62,6 +89,7 @@ class MrnaAssayTissueData(object): self.data[symbol].mb = result.Mb self.data[symbol].description = result.description self.data[symbol].probe_target_description = result.Probe_Target_Description + print("time taken in the loop is",time.time()-initial_time) ########################################################################### #Input: cursor, symbolList (list), dataIdDict(Dict) @@ -82,6 +110,7 @@ class MrnaAssayTissueData(object): WHERE TissueProbeSetData.Id IN {} and TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list)) + results = g.db.execute(query).fetchall() for result in results: if result.Symbol.lower() not in symbol_values_dict: diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py index fd7691d4..af1d6060 100644 --- a/wqflask/wqflask/correlation/correlation_functions.py +++ b/wqflask/wqflask/correlation/correlation_functions.py @@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears def get_trait_symbol_and_tissue_values(symbol_list=None): tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list) - - if len(tissue_data.gene_symbols): - return tissue_data.get_symbol_values_pairs() + if len(tissue_data.gene_symbols) >0: + results = tissue_data.get_symbol_values_pairs() + return results diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py index ba606b92..e7394647 100644 --- a/wqflask/wqflask/correlation/correlation_gn3_api.py +++ b/wqflask/wqflask/correlation/correlation_gn3_api.py @@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation from gn3.computations.correlations import map_shared_keys_to_values from gn3.computations.correlations import compute_all_tissue_correlation from gn3.computations.correlations import compute_all_lit_correlation +from gn3.computations.correlations import experimental_compute_all_tissue_correlation from gn3.db_utils import database_connector GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation" @@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None): def create_target_this_trait(start_vars): """this function creates the required trait and target dataset for correlation""" - this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset']) target_dataset = data_set.create_dataset( dataset_name=start_vars['corr_dataset']) @@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"): target_dataset.get_trait_data(list(sample_data.keys())) this_trait = retrieve_sample_data(this_trait, this_dataset) - print("Creating dataset and trait took",time.time()-initial_time) + print("Creating dataset and trait took", time.time()-initial_time) this_trait_data = { "trait_sample_data": sample_data, @@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"): this_trait=this_trait_data, target_dataset=results) - print("doing sample correlation took",time.time()-initial_time) + print("doing sample correlation took", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}" return correlation_results @@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"): "target_tissues_dict": target_tissue_data } initial_time = time.time() - correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], - target_tissues_data=corr_input_data["target_tissues_dict"], - corr_method=method) - print("time taken for compute tissue is",time.time()-initial_time) - + correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], + target_tissues_data=corr_input_data[ + "target_tissues_dict"], + corr_method=method) + print("correlation y took", time.time()-initial_time) + # initial_time = time.time() + # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"], + # target_tissues_data=corr_input_data["target_tissues_dict"], + # corr_method=method) + # print("time taken for compute tissue is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}" return correlation_results @@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"): species=species, gene_id=this_trait_geneid) return lit_corr_results - print("the time taken is",time.time()-initial_time) + print("the time taken is", time.time()-initial_time) # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}" # corr_input_data = geneid_dict # corr_results = requests.post(requests_url, json=corr_input_data) @@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict): primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower( )] - time_to_to_fetch_all = time.time() corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values( symbol_list=list(trait_symbol_dict.values())) primary_tissue_data = { -- cgit v1.2.3