From e75b1a63047549685c38471ae2294996ad52af16 Mon Sep 17 00:00:00 2001
From: zsloan
Date: Wed, 2 Mar 2016 21:18:54 +0000
Subject: Increased gsearch speed by removing unnecessary query for sample data
Still slower than preferable due to having to query the gn1 database for every trait in the search results
---
wqflask/base/data_set.py | 32 +++++++++++++++----------------
wqflask/base/trait.py | 15 ++++++++-------
wqflask/wqflask/gsearch.py | 24 ++++++++++++++---------
wqflask/wqflask/templates/show_trait.html | 15 ++-------------
4 files changed, 40 insertions(+), 46 deletions(-)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 68a2a185..aaea7ec3 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -280,7 +280,7 @@ class DatasetGroup(object):
"""
def __init__(self, dataset):
"""This sets self.group and self.group_id"""
- print("DATASET NAME2:", dataset.name)
+ #print("DATASET NAME2:", dataset.name)
self.name, self.id = g.db.execute(dataset.query_for_group).fetchone()
if self.name == 'BXD300':
self.name = "BXD"
@@ -366,7 +366,7 @@ class DatasetGroup(object):
break
if tissue_already_exists:
- print("dataset_menu:", dataset_menu[i]['datasets'])
+ #print("dataset_menu:", dataset_menu[i]['datasets'])
dataset_menu[i]['datasets'].append((dataset, dataset_short))
else:
dataset_menu.append(dict(tissue=tissue_name,
@@ -392,18 +392,18 @@ class DatasetGroup(object):
def get_samplelist(self):
key = "samplelist:v2:" + self.name
- print("key is:", key)
- with Bench("Loading cache"):
- result = Redis.get(key)
+ #print("key is:", key)
+ #with Bench("Loading cache"):
+ result = Redis.get(key)
if result:
- print("Sample List Cache hit!!!")
- print("Before unjsonifying {}: {}".format(type(result), result))
+ #print("Sample List Cache hit!!!")
+ #print("Before unjsonifying {}: {}".format(type(result), result))
self.samplelist = json.loads(result)
- print(" type: ", type(self.samplelist))
- print(" self.samplelist: ", self.samplelist)
+ #print(" type: ", type(self.samplelist))
+ #print(" self.samplelist: ", self.samplelist)
else:
- print("Cache not hit")
+ #print("Cache not hit")
from utility.tools import plink_command
PLINK_PATH,PLINK_COMMAND = plink_command()
@@ -417,7 +417,7 @@ class DatasetGroup(object):
self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
else:
self.samplelist = None
- print("after get_samplelist")
+ #print("after get_samplelist")
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
@@ -442,9 +442,9 @@ class DatasetGroup(object):
# reaper barfs on unicode filenames, so here we ensure it's a string
full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
if os.path.isfile(full_filename):
- print("Reading file: ", full_filename)
+ #print("Reading file: ", full_filename)
genotype_1.read(full_filename)
- print("File read")
+ #print("File read")
else:
try:
full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno'))
@@ -521,8 +521,6 @@ class DataSet(object):
self.group.get_samplelist()
self.species = species.TheSpecies(self)
- print("TESTING!!!")
-
def get_desc(self):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
@@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet):
def setup(self):
- print("IS A PHENOTYPEDATASET")
+ #print("IS A PHENOTYPEDATASET")
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
@@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet):
#print("After get_trait_list query")
trait_data = {}
for trait in results:
- print("Retrieving sample_data for ", trait[0])
+ #print("Retrieving sample_data for ", trait[0])
trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
#print("After retrieve_sample_data")
return trait_data
diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py
index ff80795c..eb9bc940 100755
--- a/wqflask/base/trait.py
+++ b/wqflask/base/trait.py
@@ -31,16 +31,16 @@ class GeneralTrait(object):
"""
- def __init__(self, get_qtl_info=False, **kw):
+ def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
# xor assertion
assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
if kw.get('dataset_name'):
self.dataset = create_dataset(kw.get('dataset_name'))
- print(" in GeneralTrait created dataset:", self.dataset)
+ #print(" in GeneralTrait created dataset:", self.dataset)
else:
self.dataset = kw.get('dataset')
self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc.
- print("THE NAME IS:", self.name)
+ #print("THE NAME IS:", self.name)
self.cellid = kw.get('cellid')
self.identification = kw.get('identification', 'un-named trait')
self.haveinfo = kw.get('haveinfo', False)
@@ -67,7 +67,8 @@ class GeneralTrait(object):
# Todo: These two lines are necessary most of the time, but perhaps not all of the time
# So we could add a simple if statement to short-circuit this if necessary
self.retrieve_info(get_qtl_info=get_qtl_info)
- self.retrieve_sample_data()
+ if kw.get('get_sample_info') != False:
+ self.retrieve_sample_data()
def jsonable(self):
@@ -291,7 +292,7 @@ class GeneralTrait(object):
PublishFreeze.Id = %s
""" % (self.name, self.dataset.id)
- print("query is:", query)
+ #print("query is:", query)
trait_info = g.db.execute(query).fetchone()
#XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name
@@ -402,9 +403,9 @@ class GeneralTrait(object):
#self.cursor.execute(query)
#trait_qtl = self.cursor.fetchone()
if trait_qtl:
- print("trait_qtl:", trait_qtl)
+ #print("trait_qtl:", trait_qtl)
self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl
- print("self.locus:", self.locus)
+ #print("self.locus:", self.locus)
if self.locus:
query = """
select Geno.Chr, Geno.Mb from Geno, Species
diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py
index 3d693a4c..2a8ba25c 100755
--- a/wqflask/wqflask/gsearch.py
+++ b/wqflask/wqflask/gsearch.py
@@ -5,6 +5,8 @@ from base.data_set import create_dataset
from base.trait import GeneralTrait
from dbFunction import webqtlDatabaseFunction
+from utility.benchmark import Bench
+
class GSearch(object):
def __init__(self, kw):
@@ -37,17 +39,21 @@ class GSearch(object):
AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
AND ProbeSetFreeze.public > 0
ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
- LIMIT 1000
+ LIMIT 2000
""" % (self.terms)
- re = g.db.execute(sql).fetchall()
+ with Bench("Running query"):
+ re = g.db.execute(sql).fetchall()
self.trait_list = []
- for line in re:
- dataset = create_dataset(line[3], "ProbeSet")
- trait_id = line[4]
- this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True)
- self.trait_list.append(this_trait)
- species = webqtlDatabaseFunction.retrieve_species(dataset.group.name)
- dataset.get_trait_info([this_trait], species)
+ with Bench("Creating trait objects"):
+ for line in re:
+ dataset = create_dataset(line[3], "ProbeSet")
+ trait_id = line[4]
+ #with Bench("Building trait object"):
+ this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
+ self.trait_list.append(this_trait)
+ species = webqtlDatabaseFunction.retrieve_species(dataset.group.name)
+ #with Bench("Getting trait info"):
+ dataset.get_trait_info([this_trait], species)
elif self.type == "phenotype":
sql = """
diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html
index 29239adb..0d463d5b 100755
--- a/wqflask/wqflask/templates/show_trait.html
+++ b/wqflask/wqflask/templates/show_trait.html
@@ -9,7 +9,6 @@
-
@@ -134,10 +133,6 @@
-
-
-
-
@@ -200,10 +195,7 @@
{ "bSortable": false },
{ "type": "cust-txt" }
],
- "buttons": [
- 'csv'
- ],
- "sDom": "RZBtr",
+ "sDom": "RZtr",
"iDisplayLength": -1,
"autoWidth": false,
"bLengthChange": true,
@@ -226,10 +218,7 @@
null,
{ "type": "cust-txt" }
],
- "buttons": [
- 'csv'
- ],
- "sDom": "RZBtr",
+ "sDom": "RZtr",
"iDisplayLength": -1,
"autoWidth": false,
"bLengthChange": true,
--
cgit v1.2.3