about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAlexander Kabui2021-04-20 01:38:26 +0300
committerAlexander Kabui2021-04-20 01:38:26 +0300
commit34e4933de5a1cd444abe618fcfd93b424bf3442e (patch)
treea623ba0663e71d86447b660948401ee16989433e
parent50c0ee93a59eecd40a6fbd19139671c94003c21b (diff)
downloadgenenetwork2-34e4933de5a1cd444abe618fcfd93b424bf3442e.tar.gz
refactor code for iterating mrna tissue data
-rw-r--r--wqflask/base/mrna_assay_tissue_data.py39
-rw-r--r--wqflask/wqflask/correlation/correlation_functions.py6
-rw-r--r--wqflask/wqflask/correlation/correlation_gn3_api.py24
3 files changed, 51 insertions, 18 deletions
diff --git a/wqflask/base/mrna_assay_tissue_data.py b/wqflask/base/mrna_assay_tissue_data.py
index f1929518..0220d73b 100644
--- a/wqflask/base/mrna_assay_tissue_data.py
+++ b/wqflask/base/mrna_assay_tissue_data.py
@@ -6,6 +6,7 @@ from utility import db_tools
 from utility import Bunch
 
 from utility.db_tools import escape
+from gn3.db_utils import database_connector
 
 
 from utility.logger import getLogger
@@ -44,16 +45,42 @@ class MrnaAssayTissueData(object):
                 and t.Mean = x.maxmean;
                     '''.format(in_clause)
 
-        results = g.db.execute(query).fetchall()
 
-        lower_symbols = []
+        # lower_symbols = []
+        lower_symbols = {}
         for gene_symbol in gene_symbols:
+            # lower_symbols[gene_symbol.lower()] = True
             if gene_symbol != None:
-                lower_symbols.append(gene_symbol.lower())
-
+                lower_symbols[gene_symbol.lower()] = True
+
+        import time
+        # initial_time = time.time()
+        # conn,cursor = database_connector()
+        # cursor.execute(query)
+        # for result in cursor.fetchall():
+        #     symbol = result[0]
+        #     self.data[symbol].gene_id = result[1]
+        #     self.data[symbol].data_id = result[2]
+        #     self.data[symbol].chr = result[3]
+        #     self.data[symbol].mb = result[4]
+        #     self.data[symbol].description = result[5]
+        #     self.data[symbol].probe_target_description = result[6]
+
+
+        # print("my loop takes>>>>",time.time()-initial_time)
+        # conn.close()
+        # r
+
+        # takes 5 seconds
+        initial_time = time.time()
+        results = list(g.db.execute(query).fetchall())
         for result in results:
             symbol = result[0]
-            if symbol.lower() in lower_symbols:
+            # if  symbol  is not None
+            # exists = lower_symbols.get(symbol.lower())
+            # if symbol.lower() in lower_symbols:
+            if symbol  is not None and lower_symbols.get(symbol.lower()):
+
                 symbol = symbol.lower()
 
                 self.data[symbol].gene_id = result.GeneId
@@ -62,6 +89,7 @@ class MrnaAssayTissueData(object):
                 self.data[symbol].mb = result.Mb
                 self.data[symbol].description = result.description
                 self.data[symbol].probe_target_description = result.Probe_Target_Description
+        print("time taken in the loop is",time.time()-initial_time)
 
     ###########################################################################
     #Input: cursor, symbolList (list), dataIdDict(Dict)
@@ -82,6 +110,7 @@ class MrnaAssayTissueData(object):
                        WHERE TissueProbeSetData.Id IN {} and
                              TissueProbeSetXRef.DataId = TissueProbeSetData.Id""".format(db_tools.create_in_clause(id_list))
 
+
             results = g.db.execute(query).fetchall()
             for result in results:
                 if result.Symbol.lower() not in symbol_values_dict:
diff --git a/wqflask/wqflask/correlation/correlation_functions.py b/wqflask/wqflask/correlation/correlation_functions.py
index fd7691d4..af1d6060 100644
--- a/wqflask/wqflask/correlation/correlation_functions.py
+++ b/wqflask/wqflask/correlation/correlation_functions.py
@@ -82,6 +82,6 @@ def cal_zero_order_corr_for_tiss (primaryValue=[], targetValue=[], method='pears
 
 def get_trait_symbol_and_tissue_values(symbol_list=None):
     tissue_data = MrnaAssayTissueData(gene_symbols=symbol_list)
-
-    if len(tissue_data.gene_symbols):
-        return tissue_data.get_symbol_values_pairs()
+    if len(tissue_data.gene_symbols) >0:
+        results = tissue_data.get_symbol_values_pairs()
+        return results 
diff --git a/wqflask/wqflask/correlation/correlation_gn3_api.py b/wqflask/wqflask/correlation/correlation_gn3_api.py
index ba606b92..e7394647 100644
--- a/wqflask/wqflask/correlation/correlation_gn3_api.py
+++ b/wqflask/wqflask/correlation/correlation_gn3_api.py
@@ -12,6 +12,7 @@ from gn3.computations.correlations import compute_all_sample_correlation
 from gn3.computations.correlations import map_shared_keys_to_values
 from gn3.computations.correlations import compute_all_tissue_correlation
 from gn3.computations.correlations import compute_all_lit_correlation
+from gn3.computations.correlations import experimental_compute_all_tissue_correlation
 from gn3.db_utils import database_connector
 
 GN3_CORRELATION_API = "http://127.0.0.1:8202/api/correlation"
@@ -37,7 +38,6 @@ def process_samples(start_vars, sample_names, excluded_samples=None):
 def create_target_this_trait(start_vars):
     """this function creates the required trait and target dataset for correlation"""
 
-
     this_dataset = data_set.create_dataset(dataset_name=start_vars['dataset'])
     target_dataset = data_set.create_dataset(
         dataset_name=start_vars['corr_dataset'])
@@ -81,7 +81,7 @@ def compute_correlation(start_vars, method="pearson"):
         target_dataset.get_trait_data(list(sample_data.keys()))
         this_trait = retrieve_sample_data(this_trait, this_dataset)
 
-        print("Creating dataset and trait took",time.time()-initial_time)
+        print("Creating dataset and trait took", time.time()-initial_time)
 
         this_trait_data = {
             "trait_sample_data": sample_data,
@@ -94,7 +94,7 @@ def compute_correlation(start_vars, method="pearson"):
                                                              this_trait=this_trait_data,
                                                              target_dataset=results)
 
-        print("doing sample correlation took",time.time()-initial_time)
+        print("doing sample correlation took", time.time()-initial_time)
 
         # requests_url = f"{GN3_CORRELATION_API}/sample_x/{method}"
         return correlation_results
@@ -109,11 +109,16 @@ def compute_correlation(start_vars, method="pearson"):
             "target_tissues_dict": target_tissue_data
         }
         initial_time = time.time()
-        correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
-                                                             target_tissues_data=corr_input_data["target_tissues_dict"],
-                                                             corr_method=method)
-        print("time taken for compute tissue is",time.time()-initial_time)
-
+        correlation_results = experimental_compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+                                                                          target_tissues_data=corr_input_data[
+            "target_tissues_dict"],
+            corr_method=method)
+        print("correlation y took", time.time()-initial_time)
+        # initial_time = time.time()
+        # correlation_results = compute_all_tissue_correlation(primary_tissue_dict=corr_input_data["primary_tissue"],
+        #                                                      target_tissues_data=corr_input_data["target_tissues_dict"],
+        #                                                      corr_method=method)
+        # print("time taken for compute tissue is", time.time()-initial_time)
 
         # requests_url = f"{GN3_CORRELATION_API}/tissue_corr/{method}"
         return correlation_results
@@ -131,7 +136,7 @@ def compute_correlation(start_vars, method="pearson"):
                 species=species, gene_id=this_trait_geneid)
 
         return lit_corr_results
-        print("the time taken is",time.time()-initial_time) 
+        print("the time taken is", time.time()-initial_time)
         # requests_url = f"{GN3_CORRELATION_API}/lit_corr/{species}/{this_trait_geneid}"
         # corr_input_data = geneid_dict
     # corr_results = requests.post(requests_url, json=corr_input_data)
@@ -161,7 +166,6 @@ def get_tissue_correlation_input(this_trait, trait_symbol_dict):
         primary_trait_tissue_values = primary_trait_tissue_vals_dict[this_trait.symbol.lower(
         )]
 
-        time_to_to_fetch_all = time.time()
         corr_result_tissue_vals_dict = correlation_functions.get_trait_symbol_and_tissue_values(
             symbol_list=list(trait_symbol_dict.values()))
         primary_tissue_data = {