diff options
author | zsloan | 2016-03-02 21:18:54 +0000 |
---|---|---|
committer | zsloan | 2016-03-02 21:18:54 +0000 |
commit | e75b1a63047549685c38471ae2294996ad52af16 (patch) | |
tree | faf5ae6c5d84d4cce618fd8143278d60bc42226f /wqflask | |
parent | d38f4c6c2176d935bcb4f235fb4eee8b3ad703c2 (diff) | |
download | genenetwork2-e75b1a63047549685c38471ae2294996ad52af16.tar.gz |
Increased gsearch speed by removing unnecessary query for sample data
Still slower than preferable due to having to query the gn1 database for every trait in the search results
Diffstat (limited to 'wqflask')
-rwxr-xr-x | wqflask/base/data_set.py | 32 | ||||
-rwxr-xr-x | wqflask/base/trait.py | 15 | ||||
-rwxr-xr-x | wqflask/wqflask/gsearch.py | 24 | ||||
-rwxr-xr-x | wqflask/wqflask/templates/show_trait.html | 15 |
4 files changed, 40 insertions, 46 deletions
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 68a2a185..aaea7ec3 100755 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -280,7 +280,7 @@ class DatasetGroup(object): """ def __init__(self, dataset): """This sets self.group and self.group_id""" - print("DATASET NAME2:", dataset.name) + #print("DATASET NAME2:", dataset.name) self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" @@ -366,7 +366,7 @@ class DatasetGroup(object): break if tissue_already_exists: - print("dataset_menu:", dataset_menu[i]['datasets']) + #print("dataset_menu:", dataset_menu[i]['datasets']) dataset_menu[i]['datasets'].append((dataset, dataset_short)) else: dataset_menu.append(dict(tissue=tissue_name, @@ -392,18 +392,18 @@ class DatasetGroup(object): def get_samplelist(self): key = "samplelist:v2:" + self.name - print("key is:", key) - with Bench("Loading cache"): - result = Redis.get(key) + #print("key is:", key) + #with Bench("Loading cache"): + result = Redis.get(key) if result: - print("Sample List Cache hit!!!") - print("Before unjsonifying {}: {}".format(type(result), result)) + #print("Sample List Cache hit!!!") + #print("Before unjsonifying {}: {}".format(type(result), result)) self.samplelist = json.loads(result) - print(" type: ", type(self.samplelist)) - print(" self.samplelist: ", self.samplelist) + #print(" type: ", type(self.samplelist)) + #print(" self.samplelist: ", self.samplelist) else: - print("Cache not hit") + #print("Cache not hit") from utility.tools import plink_command PLINK_PATH,PLINK_COMMAND = plink_command() @@ -417,7 +417,7 @@ class DatasetGroup(object): self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) else: self.samplelist = None - print("after get_samplelist") + #print("after get_samplelist") Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -442,9 +442,9 @@ class DatasetGroup(object): # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) if os.path.isfile(full_filename): - print("Reading file: ", full_filename) + #print("Reading file: ", full_filename) genotype_1.read(full_filename) - print("File read") + #print("File read") else: try: full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) @@ -521,8 +521,6 @@ class DataSet(object): self.group.get_samplelist() self.species = species.TheSpecies(self) - print("TESTING!!!") - def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" @@ -700,7 +698,7 @@ class PhenotypeDataSet(DataSet): def setup(self): - print("IS A PHENOTYPEDATASET") + #print("IS A PHENOTYPEDATASET") # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', @@ -1032,7 +1030,7 @@ class MrnaAssayDataSet(DataSet): #print("After get_trait_list query") trait_data = {} for trait in results: - print("Retrieving sample_data for ", trait[0]) + #print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) #print("After retrieve_sample_data") return trait_data diff --git a/wqflask/base/trait.py b/wqflask/base/trait.py index ff80795c..eb9bc940 100755 --- a/wqflask/base/trait.py +++ b/wqflask/base/trait.py @@ -31,16 +31,16 @@ class GeneralTrait(object): """ - def __init__(self, get_qtl_info=False, **kw): + def __init__(self, get_qtl_info=False, get_sample_info=True, **kw): # xor assertion assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name"; if kw.get('dataset_name'): self.dataset = create_dataset(kw.get('dataset_name')) - print(" in GeneralTrait created dataset:", self.dataset) + #print(" in GeneralTrait created dataset:", self.dataset) else: self.dataset = kw.get('dataset') self.name = kw.get('name') # Trait ID, ProbeSet ID, Published ID, etc. - print("THE NAME IS:", self.name) + #print("THE NAME IS:", self.name) self.cellid = kw.get('cellid') self.identification = kw.get('identification', 'un-named trait') self.haveinfo = kw.get('haveinfo', False) @@ -67,7 +67,8 @@ class GeneralTrait(object): # Todo: These two lines are necessary most of the time, but perhaps not all of the time # So we could add a simple if statement to short-circuit this if necessary self.retrieve_info(get_qtl_info=get_qtl_info) - self.retrieve_sample_data() + if kw.get('get_sample_info') != False: + self.retrieve_sample_data() def jsonable(self): @@ -291,7 +292,7 @@ class GeneralTrait(object): PublishFreeze.Id = %s """ % (self.name, self.dataset.id) - print("query is:", query) + #print("query is:", query) trait_info = g.db.execute(query).fetchone() #XZ, 05/08/2009: Xiaodong add this block to use ProbeSet.Id to find the probeset instead of just using ProbeSet.Name @@ -402,9 +403,9 @@ class GeneralTrait(object): #self.cursor.execute(query) #trait_qtl = self.cursor.fetchone() if trait_qtl: - print("trait_qtl:", trait_qtl) + #print("trait_qtl:", trait_qtl) self.locus, self.lrs, self.pvalue, self.mean, self.additive= trait_qtl - print("self.locus:", self.locus) + #print("self.locus:", self.locus) if self.locus: query = """ select Geno.Chr, Geno.Mb from Geno, Species diff --git a/wqflask/wqflask/gsearch.py b/wqflask/wqflask/gsearch.py index 3d693a4c..2a8ba25c 100755 --- a/wqflask/wqflask/gsearch.py +++ b/wqflask/wqflask/gsearch.py @@ -5,6 +5,8 @@ from base.data_set import create_dataset from base.trait import GeneralTrait from dbFunction import webqtlDatabaseFunction +from utility.benchmark import Bench + class GSearch(object): def __init__(self, kw): @@ -37,17 +39,21 @@ class GSearch(object): AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id AND ProbeSetFreeze.public > 0 ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name - LIMIT 1000 + LIMIT 2000 """ % (self.terms) - re = g.db.execute(sql).fetchall() + with Bench("Running query"): + re = g.db.execute(sql).fetchall() self.trait_list = [] - for line in re: - dataset = create_dataset(line[3], "ProbeSet") - trait_id = line[4] - this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True) - self.trait_list.append(this_trait) - species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) - dataset.get_trait_info([this_trait], species) + with Bench("Creating trait objects"): + for line in re: + dataset = create_dataset(line[3], "ProbeSet") + trait_id = line[4] + #with Bench("Building trait object"): + this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False) + self.trait_list.append(this_trait) + species = webqtlDatabaseFunction.retrieve_species(dataset.group.name) + #with Bench("Getting trait info"): + dataset.get_trait_info([this_trait], species) elif self.type == "phenotype": sql = """ diff --git a/wqflask/wqflask/templates/show_trait.html b/wqflask/wqflask/templates/show_trait.html index 29239adb..0d463d5b 100755 --- a/wqflask/wqflask/templates/show_trait.html +++ b/wqflask/wqflask/templates/show_trait.html @@ -9,7 +9,6 @@ <link rel="stylesheet" type="text/css" href="/static/new/css/d3-tip.min.css" /> <link rel="stylesheet" type="text/css" href="/static/new/packages/nvd3/nv.d3.min.css" /> <link rel="stylesheet" type="text/css" href="/static/new/packages/DataTables/css/jquery.dataTables.css" /> - <link rel="stylesheet" type="text/css" href="/static/new/packages/DataTables/extensions/buttons.bootstrap.css" /> <link rel="stylesheet" type="text/css" href="/static/new/packages/noUiSlider/nouislider.css" /> <link rel="stylesheet" type="text/css" href="/static/new/packages/noUiSlider/nouislider.pips.css" /> <link rel="stylesheet" type="text/css" href="/static/packages/DT_bootstrap/DT_bootstrap.css" /> @@ -134,10 +133,6 @@ <script language="javascript" type="text/javascript" src="/static/new/packages/jsPDF/jspdf.plugin.from_html.js"></script> <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/js/jquery.dataTables.js"></script> - <script language="javascript" type="text/javascript" src="https://cdn.datatables.net/buttons/1.0.0/js/dataTables.buttons.min.js"></script> - <script language="javascript" type="text/javascript" src="https://cdn.datatables.net/buttons/1.0.0/js/buttons.html5.min.js"></script> - <script language="javascript" type="text/javascript" src="https://cdn.datatables.net/buttons/1.0.0/js/buttons.bootstrap.min.js"></script> - <script language="javascript" type="text/javascript" src="/static/new/js_external/jszip.min.js"></script> <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/js/dataTables.scientific.js"></script> <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/js/dataTables.naturalSort.js"></script> <script language="javascript" type="text/javascript" src="/static/new/packages/DataTables/extensions/dataTables.colResize.js"></script> @@ -200,10 +195,7 @@ { "bSortable": false }, { "type": "cust-txt" } ], - "buttons": [ - 'csv' - ], - "sDom": "RZBtr", + "sDom": "RZtr", "iDisplayLength": -1, "autoWidth": false, "bLengthChange": true, @@ -226,10 +218,7 @@ null, { "type": "cust-txt" } ], - "buttons": [ - 'csv' - ], - "sDom": "RZBtr", + "sDom": "RZtr", "iDisplayLength": -1, "autoWidth": false, "bLengthChange": true, |