From e75b1a63047549685c38471ae2294996ad52af16 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 2 Mar 2016 21:18:54 +0000 Subject: Increased gsearch speed by removing unnecessary query for sample data Still slower than preferable due to having to query the gn1 database for every trait in the search results --- wqflask/base/data_set.py | 32 +++++++++++++++---------------- wqflask/base/trait.py | 15 ++++++++------- wqflask/wqflask/gsearch.py | 24 ++++++++++++++--------- wqflask/wqflask/templates/show_trait.html | 15 ++------------- 4 files changed, 40 insertions(+), 46 deletions(-) diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 68a2a185..aaea7ec3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -280,7 +280,7 @@ class DatasetGroup(object): """ def __init__(self, dataset): """This sets self.group and self.group_id""" - print("DATASET NAME2:", dataset.name) + #print("DATASET NAME2:", dataset.name) self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" @@ -366,7 +366,7 @@ class DatasetGroup(object): break if tissue_already_exists: - print("dataset_menu:", dataset_menu[i]['datasets']) + #print("dataset_menu:", dataset_menu[i]['datasets']) dataset_menu[i]['datasets'].append((dataset, dataset_short)) else: dataset_menu.append(dict(tissue=tissue_name, @@ -392,18 +392,18 @@ class DatasetGroup(object): def get_samplelist(self): key = "samplelist:v2:" + self.name - print("key is:", key) - with Bench("Loading cache"): - result = Redis.get(key) + #print("key is:", key) + #with Bench("Loading cache"): + result = Redis.get(key) if result: - print("Sample List Cache hit!!!") - print("Before unjsonifying {}: {}".format(type(result), result)) + #print("Sample List Cache hit!!!") + #print("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - print(" type: ", type(self.samplelist)) - print(" self.samplelist: ", self.samplelist) + #print(" type: ", type(self.samplelist)) + #print(" self.samplelist: ", self.samplelist) else: - print("Cache not hit") + #print("Cache not hit") from utility.tools import plink_command PLINK_PATH,PLINK_COMMAND = plink_command() @@ -417,7 +417,7 @@ class DatasetGroup(object): self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) else: self.samplelist = None - print("after get_samplelist") + #print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -442,9 +442,9 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) if os.path.isfile(full_filename): - print("Reading file: ", full_filename) + #print("Reading file: ", full_filename) genotype_1.read(full_filename) - print("File read") + #print("File read") else: try: full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) @@ -521,8 +521,6 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) - print("TESTING!!!") - def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" @@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet): def setup(self): - print("IS A PHENOTYPEDATASET") + #print("IS A PHENOTYPEDATASET") # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', @@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet): #print("After get_trait_list query") trait_data = {} for trait in results: - print("Retrieving sample_data for ", trait[0]) + #print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) #print("After retrieve_sample_data") return trait_data diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index ff80795c..eb9bc940 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -31,16 +31,16 @@ class GeneralTrait(object): """ - def __init__(self, get_qtl_info=False, **kw): + def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; if kw.get('dataset_name'): self.dataset = create_dataset(kw.get('dataset_name')) - print(" in GeneralTrait created dataset:", self.dataset) + #print(" in GeneralTrait created dataset:", self.dataset) else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - print("THE NAME IS:", self.name) + #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -67,7 +67,8 @@ class GeneralTrait(object): # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary self.retrieve_info(get_qtl_info=get_qtl_info) - self.retrieve_sample_data() + if kw.get('get_sample_info') != False: + self.retrieve_sample_data() def jsonable(self): @@ -291,7 +292,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + #print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -402,9 +403,9 @@ class GeneralTrait(object): #self.cursor.execute(query) #trait_qtl = self.cursor.fetchone() if trait_qtl: - print("trait_qtl:", trait_qtl) + #print("trait_qtl:", trait_qtl) self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl - print("self.locus:", self.locus) + #print("self.locus:", self.locus) if self.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 3d693a4c..2a8ba25c 100755 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -5,6 +5,8 @@ from base.data_set import create_dataset from base.trait import GeneralTrait from dbFunction import webqtlDatabaseFunction +from utility.benchmark import Bench + class GSearch(object): def __init__(self, kw): @@ -37,17 +39,21 @@ class GSearch(object): AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id AND ProbeSetFreeze.public > 0 ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name - LIMIT 1000 + LIMIT 2000 """ % (self.terms) - re = g.db.execute(sql).fetchall() + with Bench("Running query"): + re = g.db.execute(sql).fetchall() self.trait_list = [] - for line in re: - dataset = create_dataset(line[3], "ProbeSet") - trait_id = line[4] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True) - self.trait_list.append(this_trait) - species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) - dataset.get_trait_info([this_trait], species) + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[3], "ProbeSet") + trait_id = line[4] + #with Bench("Building trait object"): + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) + species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) + #with Bench("Getting trait info"): + dataset.get_trait_info([this_trait], species) elif self.type == "phenotype": sql = """ diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 29239adb..0d463d5b 100755 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -9,7 +9,6 @@ - @@ -134,10 +133,6 @@ - - - - @@ -200,10 +195,7 @@ { "bSortable": false }, { "type": "cust-txt" } ], - "buttons": [ - 'csv' - ], - "sDom": "RZBtr", + "sDom": "RZtr", "iDisplayLength": -1, "autoWidth": false, "bLengthChange": true, @@ -226,10 +218,7 @@ null, { "type": "cust-txt" } ], - "buttons": [ - 'csv' - ], - "sDom": "RZBtr", + "sDom": "RZtr", "iDisplayLength": -1, "autoWidth": false, "bLengthChange": true, -- cgit v1.2.3