From e75b1a63047549685c38471ae2294996ad52af16 Mon Sep 17 00:00:00 2001 From: zsloan Date: Wed, 2 Mar 2016 21:18:54 +0000 Subject: Increased gsearch speed by removing unnecessary query for sample data Still slower than preferable due to having to query the gn1 database for every trait in the search results --- wqflask/base/data_set.py | 32 +++++++++++++++----------------- wqflask/base/trait.py | 15 ++++++++------- 2 files changed, 23 insertions(+), 24 deletions(-) (limited to 'wqflask/base') diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 68a2a185..aaea7ec3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -280,7 +280,7 @@ class DatasetGroup(object): """ def __init__(self, dataset): """This sets self.group and self.group_id""" - print("DATASET NAME2:", dataset.name) + #print("DATASET NAME2:", dataset.name) self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" @@ -366,7 +366,7 @@ class DatasetGroup(object): break if tissue_already_exists: - print("dataset_menu:", dataset_menu[i]['datasets']) + #print("dataset_menu:", dataset_menu[i]['datasets']) dataset_menu[i]['datasets'].append((dataset, dataset_short)) else: dataset_menu.append(dict(tissue=tissue_name, @@ -392,18 +392,18 @@ class DatasetGroup(object): def get_samplelist(self): key = "samplelist:v2:" + self.name - print("key is:", key) - with Bench("Loading cache"): - result = Redis.get(key) + #print("key is:", key) + #with Bench("Loading cache"): + result = Redis.get(key) if result: - print("Sample List Cache hit!!!") - print("Before unjsonifying {}: {}".format(type(result), result)) + #print("Sample List Cache hit!!!") + #print("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - print(" type: ", type(self.samplelist)) - print(" self.samplelist: ", self.samplelist) + #print(" type: ", type(self.samplelist)) + #print(" self.samplelist: ", self.samplelist) else: - print("Cache not hit") + #print("Cache not hit") from utility.tools import plink_command PLINK_PATH,PLINK_COMMAND = plink_command() @@ -417,7 +417,7 @@ class DatasetGroup(object): self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) else: self.samplelist = None - print("after get_samplelist") + #print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -442,9 +442,9 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) if os.path.isfile(full_filename): - print("Reading file: ", full_filename) + #print("Reading file: ", full_filename) genotype_1.read(full_filename) - print("File read") + #print("File read") else: try: full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) @@ -521,8 +521,6 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) - print("TESTING!!!") - def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" @@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet): def setup(self): - print("IS A PHENOTYPEDATASET") + #print("IS A PHENOTYPEDATASET") # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', @@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet): #print("After get_trait_list query") trait_data = {} for trait in results: - print("Retrieving sample_data for ", trait[0]) + #print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) #print("After retrieve_sample_data") return trait_data diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index ff80795c..eb9bc940 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -31,16 +31,16 @@ class GeneralTrait(object): """ - def __init__(self, get_qtl_info=False, **kw): + def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; if kw.get('dataset_name'): self.dataset = create_dataset(kw.get('dataset_name')) - print(" in GeneralTrait created dataset:", self.dataset) + #print(" in GeneralTrait created dataset:", self.dataset) else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - print("THE NAME IS:", self.name) + #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -67,7 +67,8 @@ class GeneralTrait(object): # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary self.retrieve_info(get_qtl_info=get_qtl_info) - self.retrieve_sample_data() + if kw.get('get_sample_info') != False: + self.retrieve_sample_data() def jsonable(self): @@ -291,7 +292,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + #print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -402,9 +403,9 @@ class GeneralTrait(object): #self.cursor.execute(query) #trait_qtl = self.cursor.fetchone() if trait_qtl: - print("trait_qtl:", trait_qtl) + #print("trait_qtl:", trait_qtl) self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl - print("self.locus:", self.locus) + #print("self.locus:", self.locus) if self.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species -- cgit v1.2.3