diff options
author | zsloan | 2016-03-02 21:18:54 +0000 |
---|---|---|
committer | zsloan | 2016-03-02 21:18:54 +0000 |
commit | e75b1a63047549685c38471ae2294996ad52af16 (patch) | |
tree | faf5ae6c5d84d4cce618fd8143278d60bc42226f /wqflask/base | |
parent | d38f4c6c2176d935bcb4f235fb4eee8b3ad703c2 (diff) | |
download | genenetwork2-e75b1a63047549685c38471ae2294996ad52af16.tar.gz |
Increased gsearch speed by removing unnecessary query for sample data
Still slower than preferable due to having to query the gn1 database for every trait in the search results
Diffstat (limited to 'wqflask/base')
-rwxr-xr-x | wqflask/base/data_set.py | 32 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 15 |
2 files changed, 23 insertions, 24 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 68a2a185..aaea7ec3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -280,7 +280,7 @@ class DatasetGroup(object): """ def __init__(self, dataset): """This sets self.group and self.group_id""" - print("DATASET NAME2:", dataset.name) + #print("DATASET NAME2:", dataset.name) self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" @@ -366,7 +366,7 @@ class DatasetGroup(object): break if tissue_already_exists: - print("dataset_menu:", dataset_menu[i]['datasets']) + #print("dataset_menu:", dataset_menu[i]['datasets']) dataset_menu[i]['datasets'].append((dataset, dataset_short)) else: dataset_menu.append(dict(tissue=tissue_name, @@ -392,18 +392,18 @@ class DatasetGroup(object): def get_samplelist(self): key = "samplelist:v2:" + self.name - print("key is:", key) - with Bench("Loading cache"): - result = Redis.get(key) + #print("key is:", key) + #with Bench("Loading cache"): + result = Redis.get(key) if result: - print("Sample List Cache hit!!!") - print("Before unjsonifying {}: {}".format(type(result), result)) + #print("Sample List Cache hit!!!") + #print("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - print(" type: ", type(self.samplelist)) - print(" self.samplelist: ", self.samplelist) + #print(" type: ", type(self.samplelist)) + #print(" self.samplelist: ", self.samplelist) else: - print("Cache not hit") + #print("Cache not hit") from utility.tools import plink_command PLINK_PATH,PLINK_COMMAND = plink_command() @@ -417,7 +417,7 @@ class DatasetGroup(object): self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) else: self.samplelist = None - print("after get_samplelist") + #print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -442,9 +442,9 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) if os.path.isfile(full_filename): - print("Reading file: ", full_filename) + #print("Reading file: ", full_filename) genotype_1.read(full_filename) - print("File read") + #print("File read") else: try: full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) @@ -521,8 +521,6 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) - print("TESTING!!!") - def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" @@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet): def setup(self): - print("IS A PHENOTYPEDATASET") + #print("IS A PHENOTYPEDATASET") # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', @@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet): #print("After get_trait_list query") trait_data = {} for trait in results: - print("Retrieving sample_data for ", trait[0]) + #print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) #print("After retrieve_sample_data") return trait_data diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index ff80795c..eb9bc940 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -31,16 +31,16 @@ class GeneralTrait(object): """ - def __init__(self, get_qtl_info=False, **kw): + def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; if kw.get('dataset_name'): self.dataset = create_dataset(kw.get('dataset_name')) - print(" in GeneralTrait created dataset:", self.dataset) + #print(" in GeneralTrait created dataset:", self.dataset) else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - print("THE NAME IS:", self.name) + #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -67,7 +67,8 @@ class GeneralTrait(object): # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary self.retrieve_info(get_qtl_info=get_qtl_info) - self.retrieve_sample_data() + if kw.get('get_sample_info') != False: + self.retrieve_sample_data() def jsonable(self): @@ -291,7 +292,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + #print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -402,9 +403,9 @@ class GeneralTrait(object): #self.cursor.execute(query) #trait_qtl = self.cursor.fetchone() if trait_qtl: - print("trait_qtl:", trait_qtl) + #print("trait_qtl:", trait_qtl) self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl - print("self.locus:", self.locus) + #print("self.locus:", self.locus) if self.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species |