aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-20 01:38:26 +0300
committerAlexander Kabui2021-04-20 01:38:26 +0300
commit34e4933de5a1cd444abe618fcfd93b424bf3442e (patch)
treea623ba0663e71d86447b660948401ee16989433e
parent50c0ee93a59eecd40a6fbd19139671c94003c21b (diff)
downloadgenenetwork2-34e4933de5a1cd444abe618fcfd93b424bf3442e.tar.gz
refactor code for iterating mrna tissue data
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py39
-rw-r--r--wqflask/wqflask/correlation/correlation_functions.py6
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py24
3 files changed, 51 insertions, 18 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..0220d73b 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,6 +6,7 @@ from utility import db_tools
from utility import Bunch
from utility.db_tools import escape
+from gn3.db_utils import database_connector
from utility.logger import getLogger
@@ -44,16 +45,42 @@ class MrnaAssayTissueData(object):
and t.Mean = x.maxmean;
'''.format(in_clause)
- results = g.db.execute(query).fetchall()
- lower_symbols = []
+ # lower_symbols = []
+ lower_symbols = {}
for gene_symbol in gene_symbols:
+ # lower_symbols[gene_symbol.lower()] = True
if gene_symbol != None:
- lower_symbols.append(gene_symbol.lower())
-
+ lower_symbols[gene_symbol.lower()] = True
+
+ import time
+ # initial_time = time.time()
+ # conn,cursor = database_connector()
+ # cursor.execute(query)
+ # for result in cursor.fetchall():
+ # symbol = result[0]
+ # self.data[symbol].gene_id = result[1]
+ # self.data[symbol].data_id = result[2]
+ # self.data[symbol].chr = result[3]
+ # self.data[symbol].mb = result[4]
+ # self.data[symbol].description = result[5]
+ # self.data[symbol].probe_target_description = result[6]
+
+
+ # print("my loop takes>>>>",time.time()-initial_time)
+ # conn.close()
+ # r
+
+ # takes 5 seconds
+ initial_time = time.time()
+ results = list(g.db.execute(query).fetchall())
for result in results:
symbol = result[0]
- if symbol.lower() in lower_symbols:
+ # if symbol is not None
+ # exists = lower_symbols.get(symbol.lower())
+ # if symbol.lower() in lower_symbols:
+ if symbol is not None and lower_symbols.get(symbol.lower()):
+
symbol = symbol.lower()
self.data[symbol].gene_id = result.GeneId
@@ -62,6 +89,7 @@ class MrnaAssayTissueData(object):
self.data[symbol].mb = result.Mb
self.data[symbol].description = result.description
self.data[symbol].probe_target_description = result.Probe_Target_Description
+ print("time taken in the loop is",time.time()-initial_time)
###########################################################################
#Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -82,6 +110,7 @@ class MrnaAssayTissueData(object):
WHERE TissueProbeSetData.Id IN {} and
TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
+
results = g.db.execute(query).fetchall()
for result in results:
if result.Symbol.lower() not in symbol_values_dict:
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index fd7691d4..af1d6060 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears
def get_trait_symbol_and_tissue_values(symbol_list=None):
tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
-
- if len(tissue_data.gene_symbols):
- return tissue_data.get_symbol_values_pairs()
+ if len(tissue_data.gene_symbols) >0:
+ results = tissue_data.get_symbol_values_pairs()
+ return results
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index ba606b92..e7394647 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation
from gn3.computations.correlations import map_shared_keys_to_values
from gn3.computations.correlations import compute_all_tissue_correlation
from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import experimental_compute_all_tissue_correlation
from gn3.db_utils import database_connector
GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
def create_target_this_trait(start_vars):
"""this function creates the required trait and target dataset for correlation"""
-
this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
target_dataset = data_set.create_dataset(
dataset_name=start_vars['corr_dataset'])
@@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"):
target_dataset.get_trait_data(list(sample_data.keys()))
this_trait = retrieve_sample_data(this_trait, this_dataset)
- print("Creating dataset and trait took",time.time()-initial_time)
+ print("Creating dataset and trait took", time.time()-initial_time)
this_trait_data = {
"trait_sample_data": sample_data,
@@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"):
this_trait=this_trait_data,
target_dataset=results)
- print("doing sample correlation took",time.time()-initial_time)
+ print("doing sample correlation took", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
return correlation_results
@@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"):
"target_tissues_dict": target_tissue_data
}
initial_time = time.time()
- correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
- target_tissues_data=corr_input_data["target_tissues_dict"],
- corr_method=method)
- print("time taken for compute tissue is",time.time()-initial_time)
-
+ correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ target_tissues_data=corr_input_data[
+ "target_tissues_dict"],
+ corr_method=method)
+ print("correlation y took", time.time()-initial_time)
+ # initial_time = time.time()
+ # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+ # target_tissues_data=corr_input_data["target_tissues_dict"],
+ # corr_method=method)
+ # print("time taken for compute tissue is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
return correlation_results
@@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"):
species=species, gene_id=this_trait_geneid)
return lit_corr_results
- print("the time taken is",time.time()-initial_time)
+ print("the time taken is", time.time()-initial_time)
# requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
# corr_input_data = geneid_dict
# corr_results = requests.post(requests_url, json=corr_input_data)
@@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
)]
- time_to_to_fetch_all = time.time()
corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
symbol_list=list(trait_symbol_dict.values()))
primary_tissue_data = {