From 764136ff18bd355f7b9dbe91f870919b0a17322e Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Fri, 24 Jun 2016 06:51:28 +0000 Subject: Log: and document SQL calls --- bin/genenetwork2 | 2 +- doc/Architecture.org | 2 + doc/database.org | 152 ++++++++++++++++++++++++++++++++++++++++------ etc/default_settings.py | 5 +- wqflask/base/data_set.py | 10 +-- wqflask/utility/logger.py | 24 +++++--- wqflask/utility/tools.py | 13 ++-- 7 files changed, 173 insertions(+), 35 deletions(-) diff --git a/bin/genenetwork2 b/bin/genenetwork2 index bbb2a19f..d3bf3299 100755 --- a/bin/genenetwork2 +++ b/bin/genenetwork2 @@ -33,7 +33,7 @@ if [ -z $TEMPDIR ]; then TEMPDIR="/tmp" fi -# Start the redis server +echo "Starting the redis server:" echo -n "dir $TEMPDIR dbfilename gn2.rdb " | redis-server - & diff --git a/doc/Architecture.org b/doc/Architecture.org index 7736e2c1..b9e8e355 100644 --- a/doc/Architecture.org +++ b/doc/Architecture.org @@ -34,6 +34,8 @@ search_result.py which invokes database functions in wqflask/dbFunction/webqtlDatabaseFunction.py, for example. The receiving template lives at [[https://github.com/genenetwork/genenetwork2/blob/master/wqflask/wqflask/templates/search_result_page.html][search_result_page.html]]. +For what happens at the database level see [[database.org]]. + ** GnServer (REST) The [[https://github.com/genenetwork/gn_server][GnServer REST API]] is built on high performance [[http://elixir-lang.org/][Elixir]] with [[https://github.com/falood/maru][Maru]]. diff --git a/doc/database.org b/doc/database.org index 2221c4fb..753179e6 100644 --- a/doc/database.org +++ b/doc/database.org @@ -7,7 +7,7 @@ number of the BSD's and the Human liver dataset (GSE9588). * GeneNetwork database -** Estimated table sizes +** Estimated table sizes select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length; @@ -172,24 +172,24 @@ This table is being used by both GN1 and GN2 from the trait pages! gn1/web/webqtl/showTrait/ShowTraitPage.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \ gn1/web/webqtl/showTrait/ShowTraitPage.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip) gn1/web/webqtl/textUI/cmdClass.py: query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400""" -gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)""" +gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)""" gn2/wqflask/wqflask/show_trait/show_trait_page.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \ gn2/wqflask/wqflask/show_trait/show_trait_page.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip) When looking at the code in GN1 and GN2 it restricts the daily use of the trait data page (set to 1,000 - whoever reaches that?). Unlike mentioned in the schema description, this table does *not* keep track -of cookies. +of cookies. From the code it looks like GN2 uses a mixture of Redis and sqlalchemy to keep track of logged in sessions (see -gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in +gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in model.py. In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it show_trait_page appears to be loaded (need to check). -** AvgMethod +** AvgMethod Probesetfreeze refers to AvgMethod @@ -235,7 +235,7 @@ Hooked in API (URL encoding) GN2 only (see menu bar) -** Ensembl* +** Ensembl* Probe information @@ -321,15 +321,15 @@ Indel Snp browser (variant browser Gn1) ** Info* -Infra system PhP +Infra system PhP -Data Info button +Data Info button Infosystem users has separate entries -Also Investigators, User, Organizations, +Also Investigators, User, Organizations, -** LCorrRamin3 +** LCorrRamin3 Lit. Correlations Prof. Ramin @@ -376,7 +376,7 @@ This table contains names, full descriptions, and short symbols for traits and phenotype used primarily in the Published Phenotypes databases. -Contains 10k rows, March 2016, of which 5000 are for the BXDs). +Contains 10k rows, March 2016, of which 5000 are for the BXDs). | Id | Pre_publication_description | Post_publication_description | Original_description | Units | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter | Owner | Authorized_Users | +----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+ @@ -493,7 +493,7 @@ select count(*) from ProbeSet limit 5; ** ProbeSetData -Probedata - main molecular data. Probesets, metabolome, +Probedata - main molecular data. Probesets, metabolome, Almost all important molecular assay data is in this table including probe set data, RNA-seq data, proteomic data, and metabolomic @@ -599,7 +599,7 @@ select * from ProbeSetFreeze limit 5; Note that the following unlimited search is very slow: -select max(value) from ProbeSetData; +select max(value) from ProbeSetData; +------------+ | max(value) | @@ -617,12 +617,12 @@ Suggestions on the schema page: "StrainId" should be "CaseId" or "SampleId". -"ProbeSetData" should probably be "AssayData" or something more neutral. +"ProbeSetData" should probably be "AssayData" or something more neutral. *** Comments I think the ProbeSetData table should be generalized to a 'phenotypes' -table with an 'sample_id' column and a 'value' column. +table with an 'sample_id' column and a 'value' column. A new table 'samples' will link each sample against an 'experiment', an 'individual' and which in turn can link to a 'strain'. @@ -697,7 +697,7 @@ select * from Species; +----+-----------+----------------------+----------------+----------------------+-------------------------+------------+---------+ -** InbredSet +** InbredSet Menu @@ -723,10 +723,128 @@ User selection - retained ** UserPrivilege -** Vlookup +** Vlookup * Fetching Data +** Menu + +What happens at the main search menu can be emulated with curl. + +*** Search Mouse mRNA from HC_M2_0606_P dataset + +: curl "http://localhost:5003/search?species=mouse&group=BXD&ppocampus+mRNA&dataset=HC_M2_0606_P&search_terms_or=&search_terms_and=MEAN%3D%2815+16%29+LRS%3D%2823+46%29+&FormID=searchResult" + +the first SQL query checks whether a dataset is confidential or not: + +INFO:base.data_set:.sql: geno_mrna_confidentiality: u'SELECT Id, Name, +FullName, confidentiality,AuthorisedUsers FROM ProbeSetFreeze WHERE +Name = "HC_M2_0606_P"' + +The following is executed once per search: + +INFO:db.call:.sql: retrieve_other_names: + SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name + FROM ProbeSetFreeze, ProbeFreeze, Tissue + WHERE ProbeSetFreeze.public > 0 AND + ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND + ProbeFreeze.TissueId = Tissue.Id AND + (ProbeSetFreeze.Name = 'HC_M2_0606_P' OR ProbeSetFreeze.FullName = 'HC_M2_0606_P' OR ProbeSetFreeze.ShortName = 'HC_M2_0606_P') + +INFO:db.call:.sql: (112, 'HC_M2_0606_P', 'Hippocampus Consortium M430v2 (Jun06) PDNN', 'Hippocampus M430v2 BXD 06/06 PDNN', 'log2', 'Hippocampus mRNA') + +INFO:db.call:.sql: __init__: + SELECT + InbredSet.Name, InbredSet.Id + FROM + InbredSet, ProbeSetFreeze, ProbeFreeze + WHERE + ProbeFreeze.InbredSetId = InbredSet.Id AND + ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND + ProbeSetFreeze.Name = "HC_M2_0606_P" + +INFO:db.call:.sql: ('BXD', 1) + +INFO:base.species:.sql: __init__: + Select + Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet + where + Chr_Length.SpeciesId = InbredSet.SpeciesId AND + InbredSet.Name = 'BXD' + Order by OrderId + +And the following are executed for every trait: + +INFO:wqflask.do_search:.sql: execute: SELECT distinct ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM, ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet WHERE (( ProbeSetXRef.mean > 15.0 and ProbeSetXRef.mean < 16.0 )AND( ProbeSetXRef.LRS > 23.0 and ProbeSetXRef.LRS < 46.0 )) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = 112 ORDER BY ProbeSet.symbol ASC + +INFO:base.trait:.sql: retrieve_info: SELECT ProbeSet.name, + ProbeSet.symbol, ProbeSet.description, + ProbeSet.probe_target_description, ProbeSet.chr, + ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid, + ProbeSet.genbankid, ProbeSet.unigeneid, + ProbeSet.omim, ProbeSet.refseq_transcriptid, + ProbeSet.blatseq, ProbeSet.targetseq, + ProbeSet.chipid, ProbeSet.comments, + ProbeSet.strand_probe, ProbeSet.strand_gene, + ProbeSet.probe_set_target_region, + ProbeSet.probe_set_specificity, + ProbeSet.probe_set_blat_score, + ProbeSet.probe_set_blat_mb_start, + ProbeSet.probe_set_blat_mb_end, + ProbeSet.probe_set_strand, + ProbeSet.probe_set_note_by_rw, ProbeSet.flag + + FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef + WHERE + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetFreeze.Name = 'HC_M2_0606_P' AND + ProbeSet.Name = '1443823_s_at' + +INFO:base.trait:.sql: retrieve_info: + SELECT + HomologeneId + FROM + Homologene, Species, InbredSet + WHERE + Homologene.GeneId =98660 AND + InbredSet.Name = 'BXD' AND + InbredSet.SpeciesId = Species.Id AND + Species.TaxonomyId = Homologene.TaxonomyId + +INFO:base.trait:.sql: retrieve_info: + SELECT + ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive + FROM + ProbeSetXRef, ProbeSet + WHERE + ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSet.Name = "1443823_s_at" AND + ProbeSetXRef.ProbeSetFreezeId =112 + +INFO:base.trait:.sql: retrieve_info: + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = 'mouse' and + Geno.Name = 'NES13033186' and + Geno.SpeciesId = Species.Id + + +and finally mean and chromosome positions are fetched a second time +for every trait with + +INFO:base.data_set:.sql: get_trait_info: select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet + where ProbeSetXRef.ProbeSetFreezeId = 112 and + ProbeSet.Id = ProbeSetXRef.ProbeSetId and + ProbeSet.Name = '1443823_s_at' + +INFO:base.data_set:.sql: get_trait_info: + select Geno.Chr, Geno.Mb from Geno, Species + where Species.Name = 'mouse' and + Geno.Name = 'NES13033186' and + Geno.SpeciesId = Species.Id + +(that is a bug!). + ** Fetch phenotypes To get at phenotype data ProbeSetData is the main table (almost all diff --git a/etc/default_settings.py b/etc/default_settings.py index 29bc2133..d4eb5580 100644 --- a/etc/default_settings.py +++ b/etc/default_settings.py @@ -10,6 +10,9 @@ # # Note also that in the near future we will additionally fetch # settings from a JSON file +# +# Note that values for False and 0 have to be strings here - otherwise +# Flask won't pick them up import os import sys @@ -38,7 +41,7 @@ WEBSERVER_BRANDING = None # Set the branding (nyi) WEBSERVER_DEPLOY = None # Deployment specifics (nyi) LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL) -LOG_LEVEL_DEBUG = 1 # Debug log level (0-5) (nyi) +LOG_LEVEL_DEBUG = '0' # Debug log level (0-5, 0 = show all) LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls LOG_SQLALCHEMY = 'False' LOG_BENCH = True # Log bench marks diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index ffc5917a..c70738f7 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -711,12 +711,14 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_location_value = 1000000 if this_trait.lrs: - result = g.db.execute(""" + query = """ select Geno.Chr, Geno.Mb from Geno, Species where Species.Name = %s and Geno.Name = %s and Geno.SpeciesId = Species.Id - """, (species, this_trait.locus)).fetchone() + """ % (species, this_trait.locus) + logger.sql(query) + result = g.db.execute(query).fetchone() if result: if result[0] and result[1]: @@ -1172,9 +1174,9 @@ def geno_mrna_confidentiality(ob): #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table)) query = '''SELECT Id, Name, FullName, confidentiality, - AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table) + AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table,ob.name) logger.sql(query) - result = g.db.execute(query, ob.name) + result = g.db.execute(query) (dataset_id, name, diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py index c62ea2fe..86ee1c52 100644 --- a/wqflask/utility/logger.py +++ b/wqflask/utility/logger.py @@ -32,7 +32,7 @@ from inspect import isfunction from pprint import pformat as pf from inspect import stack -from utility.tools import LOG_LEVEL, LOG_SQL, LOG_FORMAT +from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL, LOG_FORMAT class GNLogger: """A logger class with some additional functionality, such as @@ -48,9 +48,13 @@ class GNLogger: """Set the undelying log level""" self.logger.setLevel(value) - def debug(self,*args): - """Call logging.debug for multiple args""" - self.collect(self.logger.debug,*args) + def debug(self,level=0,*args): + """Call logging.debug for multiple args. Use level=num to filter on +LOG_LEVEL_DEBUG. + + """ + if level <= LOG_LEVEL_DEBUG: + self.collect(self.logger.debug,*args) def info(self,*args): """Call logging.info for multiple args""" @@ -71,16 +75,20 @@ class GNLogger: if self.logger.getEffectiveLevel() < 30: self.collectf(self.logger.debug,*args) - def debugf(self,*args): + def debugf(self,level=0,*args): """Call logging.debug for multiple args lazily""" # only evaluate function when logging - if self.logger.getEffectiveLevel() < 20: - self.collectf(self.logger.debug,*args) + if level <= LOG_LEVEL_DEBUG: + if self.logger.getEffectiveLevel() < 20: + self.collectf(self.logger.debug,*args) def sql(self, sqlcommand, fun = None): """Log SQL command, optionally invoking a timed fun""" if LOG_SQL: - self.info(stack()[1][3],sqlcommand) + caller = stack()[1][3] + if caller in ['fetchone','fetch1','fetchall']: + caller = stack()[2][3] + self.info(caller,sqlcommand) if fun: result = fun(sqlcommand) if LOG_SQL: diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index 93b1fa51..bb8241f5 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -43,12 +43,13 @@ def get_setting(command_id,guess=None): # ---- Check whether environment exists logger.debug("Looking for "+command_id+"\n") command = value(os.environ.get(command_id)) - if command == None or command == "": + if command is None or command == "": # ---- Check whether setting exists in app command = value(app.config.get(command_id)) - if command == None: + if command is None: command = value(guess) - if command == None or command == "": + if command is None or command == "": + print command raise Exception(command_id+' setting unknown or faulty (update default_settings.py?).') logger.debug("Set "+command_id+"="+str(command)) return command @@ -61,7 +62,11 @@ def get_setting_bool(id): def get_setting_int(id): v = get_setting(id) - return int(v) + if isinstance(v, str): + return int(v) + if v is None: + return 0 + return v def valid_bin(bin): if os.path.islink(bin) or valid_file(bin): -- cgit v1.2.3