Increased gsearch speed by removing unnecessary query for sample data

Still slower than preferable due to having to query the gn1 database for every trait in the search results
author: zsloan 2016-03-02 21:18:54 +0000
committer: zsloan 2016-03-02 21:18:54 +0000
commit: e75b1a63047549685c38471ae2294996ad52af16 (patch)
tree: faf5ae6c5d84d4cce618fd8143278d60bc42226f /wqflask/base
parent: d38f4c6c2176d935bcb4f235fb4eee8b3ad703c2 (diff)
download: genenetwork2-e75b1a63047549685c38471ae2294996ad52af16.tar.gz
2 files changed, 23 insertions, 24 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 68a2a185..aaea7ec3 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -280,7 +280,7 @@ class DatasetGroup(object):
     """
     def __init__(self, dataset):
         """This sets self.group and self.group_id"""
-        print("DATASET NAME2:", dataset.name)
+        #print("DATASET NAME2:", dataset.name)
         self.name, self.id = g.db.execute(dataset.query_for_group).fetchone()
         if self.name == 'BXD300':
             self.name = "BXD"
@@ -366,7 +366,7 @@ class DatasetGroup(object):
                         break
 
                 if tissue_already_exists:
-                    print("dataset_menu:", dataset_menu[i]['datasets'])
+                    #print("dataset_menu:", dataset_menu[i]['datasets'])
                     dataset_menu[i]['datasets'].append((dataset, dataset_short))
                 else:
                     dataset_menu.append(dict(tissue=tissue_name,
@@ -392,18 +392,18 @@ class DatasetGroup(object):
 
     def get_samplelist(self):
         key = "samplelist:v2:" + self.name
-        print("key is:", key)
-        with Bench("Loading cache"):
-            result = Redis.get(key)
+        #print("key is:", key)
+        #with Bench("Loading cache"):
+        result = Redis.get(key)
 
         if result:
-            print("Sample List Cache hit!!!")
-            print("Before unjsonifying {}: {}".format(type(result), result))
+            #print("Sample List Cache hit!!!")
+            #print("Before unjsonifying {}: {}".format(type(result), result))
             self.samplelist = json.loads(result)
-            print("  type: ", type(self.samplelist))
-            print("  self.samplelist: ", self.samplelist)
+            #print("  type: ", type(self.samplelist))
+            #print("  self.samplelist: ", self.samplelist)
         else:
-            print("Cache not hit")
+            #print("Cache not hit")
 
             from utility.tools import plink_command
             PLINK_PATH,PLINK_COMMAND = plink_command()
@@ -417,7 +417,7 @@ class DatasetGroup(object):
                 self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
             else:
                 self.samplelist = None
-            print("after get_samplelist")
+            #print("after get_samplelist")
             Redis.set(key, json.dumps(self.samplelist))
             Redis.expire(key, 60*5)
 
@@ -442,9 +442,9 @@ class DatasetGroup(object):
         # reaper barfs on unicode filenames, so here we ensure it's a string
         full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
         if os.path.isfile(full_filename):
-            print("Reading file: ", full_filename)
+            #print("Reading file: ", full_filename)
             genotype_1.read(full_filename)
-            print("File read")
+            #print("File read")
         else:
             try:
                 full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno'))
@@ -521,8 +521,6 @@ class DataSet(object):
         self.group.get_samplelist()
         self.species = species.TheSpecies(self)
 
-        print("TESTING!!!")
-
 
     def get_desc(self):
         """Gets overridden later, at least for Temp...used by trait's get_given_name"""
@@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet):
 
     def setup(self):
         
-        print("IS A PHENOTYPEDATASET")
+        #print("IS A PHENOTYPEDATASET")
         
         # Fields in the database table
         self.search_fields = ['Phenotype.Post_publication_description',
@@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet):
         #print("After get_trait_list query")
         trait_data = {}
         for trait in results:
-            print("Retrieving sample_data for ", trait[0])
+            #print("Retrieving sample_data for ", trait[0])
             trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
         #print("After retrieve_sample_data")
         return trait_data
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index ff80795c..eb9bc940 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -31,16 +31,16 @@ class GeneralTrait(object):
 
     """
 
-    def __init__(self, get_qtl_info=False, **kw):
+    def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
         # xor assertion
         assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
         if kw.get('dataset_name'):
             self.dataset = create_dataset(kw.get('dataset_name'))
-            print(" in GeneralTrait created dataset:", self.dataset)
+            #print(" in GeneralTrait created dataset:", self.dataset)
         else:
             self.dataset = kw.get('dataset')
         self.name = kw.get('name')                 # Trait ID, ProbeSet ID, Published ID, etc.
-        print("THE NAME IS:", self.name)
+        #print("THE NAME IS:", self.name)
         self.cellid = kw.get('cellid')
         self.identification = kw.get('identification', 'un-named trait')
         self.haveinfo = kw.get('haveinfo', False)
@@ -67,7 +67,8 @@ class GeneralTrait(object):
         # Todo: These two lines are necessary most of the time, but perhaps not all of the time
         # So we could add a simple if statement to short-circuit this if necessary
         self.retrieve_info(get_qtl_info=get_qtl_info)
-        self.retrieve_sample_data()
+        if kw.get('get_sample_info') != False:
+            self.retrieve_sample_data()
         
         
     def jsonable(self):
@@ -291,7 +292,7 @@ class GeneralTrait(object):
                             PublishFreeze.Id = %s
                     """ % (self.name, self.dataset.id)
             
-            print("query is:", query) 
+            #print("query is:", query) 
         
             trait_info = g.db.execute(query).fetchone()
         #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
@@ -402,9 +403,9 @@ class GeneralTrait(object):
                     #self.cursor.execute(query)
                     #trait_qtl = self.cursor.fetchone()
                     if trait_qtl:
-                        print("trait_qtl:", trait_qtl)
+                        #print("trait_qtl:", trait_qtl)
                         self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl
-                        print("self.locus:", self.locus)
+                        #print("self.locus:", self.locus)
                         if self.locus:
                             query = """
                                 select Geno.Chr, Geno.Mb from Geno, Species
author	zsloan	2016-03-02 21:18:54 +0000
committer	zsloan	2016-03-02 21:18:54 +0000
commit	e75b1a63047549685c38471ae2294996ad52af16 (patch)
tree	faf5ae6c5d84d4cce618fd8143278d60bc42226f /wqflask/base
parent	d38f4c6c2176d935bcb4f235fb4eee8b3ad703c2 (diff)
download	genenetwork2-e75b1a63047549685c38471ae2294996ad52af16.tar.gz