Log: and document SQL calls

author: Pjotr Prins 2016-06-24 06:51:28 +0000
committer: Pjotr Prins 2016-06-24 06:51:28 +0000
commit: 764136ff18bd355f7b9dbe91f870919b0a17322e (patch)
tree: 2636b5c0d68d38e391476b4723a6f005d536b5c8
parent: 923e94c39f5768f836dbf5fc29b0186c13ccc86b (diff)
download: genenetwork2-764136ff18bd355f7b9dbe91f870919b0a17322e.tar.gz
7 files changed, 173 insertions, 35 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index bbb2a19f..d3bf3299 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -33,7 +33,7 @@ if [ -z $TEMPDIR ]; then
     TEMPDIR="/tmp"
 fi
                          
-# Start the redis server
+echo "Starting the redis server:"
 echo -n "dir $TEMPDIR
 dbfilename gn2.rdb
 " | redis-server - &
diff --git a/doc/Architecture.org b/doc/Architecture.org
index 7736e2c1..b9e8e355 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -34,6 +34,8 @@ search_result.py which invokes database functions in
 wqflask/dbFunction/webqtlDatabaseFunction.py, for example. The
 receiving template lives at [[https://github.com/genenetwork/genenetwork2/blob/master/wqflask/wqflask/templates/search_result_page.html][search_result_page.html]].
 
+For what happens at the database level see [[database.org]].
+
 ** GnServer (REST)
 
 The [[https://github.com/genenetwork/gn_server][GnServer REST API]] is built on high performance [[http://elixir-lang.org/][Elixir]] with [[https://github.com/falood/maru][Maru]].
diff --git a/doc/database.org b/doc/database.org
index 2221c4fb..753179e6 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -7,7 +7,7 @@ number of the BSD's and the Human liver dataset (GSE9588).
 
 * GeneNetwork database
 
-** Estimated table sizes 
+** Estimated table sizes
 
 
 select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length;
@@ -172,24 +172,24 @@ This table is being used by both GN1 and GN2 from the trait pages!
 gn1/web/webqtl/showTrait/ShowTraitPage.py:              query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
 gn1/web/webqtl/showTrait/ShowTraitPage.py:                      self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip)
 gn1/web/webqtl/textUI/cmdClass.py:                      query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400"""
-gn1/web/webqtl/textUI/cmdClass.py:                      query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)""" 
+gn1/web/webqtl/textUI/cmdClass.py:                      query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)"""
 gn2/wqflask/wqflask/show_trait/show_trait_page.py:        query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
 gn2/wqflask/wqflask/show_trait/show_trait_page.py:        self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip)
 
 When looking at the code in GN1 and GN2 it restricts the daily use of
 the trait data page (set to 1,000 - whoever reaches that?). Unlike
 mentioned in the schema description, this table does *not* keep track
-of cookies. 
+of cookies.
 
 From the code it looks like GN2 uses a mixture of Redis and sqlalchemy
 to keep track of logged in sessions (see
-gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in 
+gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in
 model.py.
 
 In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it
 show_trait_page appears to be loaded (need to check).
 
-** AvgMethod 
+** AvgMethod
 
 Probesetfreeze refers to AvgMethod
 
@@ -235,7 +235,7 @@ Hooked in API (URL encoding)
 
 GN2 only (see menu bar)
 
-** Ensembl* 
+** Ensembl*
 
 Probe information
 
@@ -321,15 +321,15 @@ Indel Snp browser (variant browser Gn1)
 
 ** Info*
 
-Infra system PhP 
+Infra system PhP
 
-Data Info button 
+Data Info button
 
 Infosystem users has separate entries
 
-Also Investigators, User, Organizations, 
+Also Investigators, User, Organizations,
 
-** LCorrRamin3 
+** LCorrRamin3
 
 Lit. Correlations Prof. Ramin
 
@@ -376,7 +376,7 @@ This table contains names, full descriptions, and short symbols for
 traits and phenotype used primarily in the Published Phenotypes
 databases.
 
-Contains 10k rows, March 2016, of which 5000 are for the BXDs). 
+Contains 10k rows, March 2016, of which 5000 are for the BXDs).
 
 | Id | Pre_publication_description | Post_publication_description                                                                                         | Original_description                                                                                                                                        | Units                | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter   | Owner | Authorized_Users |
 +----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
@@ -493,7 +493,7 @@ select count(*) from ProbeSet limit 5;
 
 ** ProbeSetData
 
-Probedata - main molecular data. Probesets, metabolome, 
+Probedata - main molecular data. Probesets, metabolome,
 
 Almost all important molecular assay data is in this table including
 probe set data, RNA-seq data, proteomic data, and metabolomic
@@ -599,7 +599,7 @@ select * from ProbeSetFreeze limit 5;
 
 Note that the following unlimited search is very slow:
 
-select max(value) from ProbeSetData; 
+select max(value) from ProbeSetData;
 
 +------------+
 | max(value) |
@@ -617,12 +617,12 @@ Suggestions on the schema page:
 
 "StrainId" should be "CaseId" or "SampleId".
 
-"ProbeSetData" should probably be "AssayData" or something more neutral. 
+"ProbeSetData" should probably be "AssayData" or something more neutral.
 
 *** Comments
 
 I think the ProbeSetData table should be generalized to a 'phenotypes'
-table with an 'sample_id' column and a 'value' column. 
+table with an 'sample_id' column and a 'value' column.
 
 A new table 'samples' will link each sample against an 'experiment',
 an 'individual' and which in turn can link to a 'strain'.
@@ -697,7 +697,7 @@ select * from Species;
 +----+-----------+----------------------+----------------+----------------------+-------------------------+------------+---------+
 
 
-** InbredSet 
+** InbredSet
 
 Menu
 
@@ -723,10 +723,128 @@ User selection - retained
 
 ** UserPrivilege
 
-** Vlookup 
+** Vlookup
 
 * Fetching Data
 
+** Menu
+
+What happens at the main search menu can be emulated with curl.
+
+*** Search Mouse mRNA from HC_M2_0606_P dataset
+
+: curl "http://localhost:5003/search?species=mouse&group=BXD&ppocampus+mRNA&dataset=HC_M2_0606_P&search_terms_or=&search_terms_and=MEAN%3D%2815+16%29+LRS%3D%2823+46%29+&FormID=searchResult"
+
+the first SQL query checks whether a dataset is confidential or not:
+
+INFO:base.data_set:.sql: geno_mrna_confidentiality: u'SELECT Id, Name,
+FullName, confidentiality,AuthorisedUsers FROM ProbeSetFreeze WHERE
+Name = "HC_M2_0606_P"'
+
+The following is executed once per search:
+
+INFO:db.call:.sql: retrieve_other_names:
+                        SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name
+                        FROM ProbeSetFreeze, ProbeFreeze, Tissue
+                        WHERE ProbeSetFreeze.public > 0 AND
+                              ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+                              ProbeFreeze.TissueId = Tissue.Id AND
+                             (ProbeSetFreeze.Name = 'HC_M2_0606_P' OR ProbeSetFreeze.FullName = 'HC_M2_0606_P' OR ProbeSetFreeze.ShortName = 'HC_M2_0606_P')
+
+INFO:db.call:.sql: (112, 'HC_M2_0606_P', 'Hippocampus Consortium M430v2 (Jun06) PDNN', 'Hippocampus M430v2 BXD 06/06 PDNN', 'log2', 'Hippocampus mRNA')
+
+INFO:db.call:.sql: __init__:
+                        SELECT
+                                InbredSet.Name, InbredSet.Id
+                        FROM
+                                InbredSet, ProbeSetFreeze, ProbeFreeze
+                        WHERE
+                                ProbeFreeze.InbredSetId = InbredSet.Id AND
+                                ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+                                ProbeSetFreeze.Name = "HC_M2_0606_P"
+
+INFO:db.call:.sql: ('BXD', 1)
+
+INFO:base.species:.sql: __init__:
+                Select
+                        Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
+                where
+                        Chr_Length.SpeciesId = InbredSet.SpeciesId AND
+                        InbredSet.Name = 'BXD'
+                Order by OrderId
+
+And the following are executed for every trait:
+
+INFO:wqflask.do_search:.sql: execute: SELECT distinct ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM, ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet WHERE (( ProbeSetXRef.mean > 15.0 and ProbeSetXRef.mean < 16.0 )AND( ProbeSetXRef.LRS > 23.0 and ProbeSetXRef.LRS < 46.0 )) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = 112 ORDER BY ProbeSet.symbol ASC
+
+INFO:base.trait:.sql: retrieve_info: SELECT ProbeSet.name,
+                    ProbeSet.symbol, ProbeSet.description,
+                    ProbeSet.probe_target_description, ProbeSet.chr,
+                    ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid,
+                    ProbeSet.genbankid, ProbeSet.unigeneid,
+                    ProbeSet.omim, ProbeSet.refseq_transcriptid,
+                    ProbeSet.blatseq, ProbeSet.targetseq,
+                    ProbeSet.chipid, ProbeSet.comments,
+                    ProbeSet.strand_probe, ProbeSet.strand_gene,
+                    ProbeSet.probe_set_target_region,
+                    ProbeSet.probe_set_specificity,
+                    ProbeSet.probe_set_blat_score,
+                    ProbeSet.probe_set_blat_mb_start,
+                    ProbeSet.probe_set_blat_mb_end,
+                    ProbeSet.probe_set_strand,
+                    ProbeSet.probe_set_note_by_rw, ProbeSet.flag
+
+                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                    WHERE
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetFreeze.Name = 'HC_M2_0606_P' AND
+                            ProbeSet.Name = '1443823_s_at'
+
+INFO:base.trait:.sql: retrieve_info:
+                            SELECT
+                                    HomologeneId
+                            FROM
+                                    Homologene, Species, InbredSet
+                            WHERE
+                                    Homologene.GeneId =98660 AND
+                                    InbredSet.Name = 'BXD' AND
+                                    InbredSet.SpeciesId = Species.Id AND
+                                    Species.TaxonomyId = Homologene.TaxonomyId
+
+INFO:base.trait:.sql: retrieve_info:
+                            SELECT
+                                    ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
+                            FROM
+                                    ProbeSetXRef, ProbeSet
+                            WHERE
+                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                                    ProbeSet.Name = "1443823_s_at" AND
+                                    ProbeSetXRef.ProbeSetFreezeId =112
+
+INFO:base.trait:.sql: retrieve_info:
+                                select Geno.Chr, Geno.Mb from Geno, Species
+                                where Species.Name = 'mouse' and
+                                Geno.Name = 'NES13033186' and
+                                Geno.SpeciesId = Species.Id
+
+
+and finally mean and chromosome positions are fetched a second time
+for every trait with
+
+INFO:base.data_set:.sql: get_trait_info: select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
+                where ProbeSetXRef.ProbeSetFreezeId = 112 and
+                ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+                ProbeSet.Name = '1443823_s_at'
+
+INFO:base.data_set:.sql: get_trait_info:
+                    select Geno.Chr, Geno.Mb from Geno, Species
+                    where Species.Name = 'mouse' and
+                        Geno.Name = 'NES13033186' and
+                        Geno.SpeciesId = Species.Id
+
+(that is a bug!).
+
 ** Fetch phenotypes
 
 To get at phenotype data ProbeSetData is the main table (almost all
diff --git a/etc/default_settings.py b/etc/default_settings.py
index 29bc2133..d4eb5580 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -10,6 +10,9 @@
 #
 # Note also that in the near future we will additionally fetch
 # settings from a JSON file
+#
+# Note that values for False and 0 have to be strings here - otherwise
+# Flask won't pick them up
 
 import os
 import sys
@@ -38,7 +41,7 @@ WEBSERVER_BRANDING = None   # Set the branding (nyi)
 WEBSERVER_DEPLOY = None     # Deployment specifics (nyi)
 
 LOG_LEVEL       = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL)
-LOG_LEVEL_DEBUG = 1         # Debug log level (0-5) (nyi)
+LOG_LEVEL_DEBUG = '0'       # Debug log level (0-5, 0 = show all)
 LOG_SQL         = 'False'   # Log SQL/backend and GN_SERVER calls
 LOG_SQLALCHEMY  = 'False'
 LOG_BENCH       = True      # Log bench marks
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ffc5917a..c70738f7 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -711,12 +711,14 @@ class PhenotypeDataSet(DataSet):
             this_trait.LRS_location_value = 1000000
 
             if this_trait.lrs:
-                result = g.db.execute("""
+                query = """
                     select Geno.Chr, Geno.Mb from Geno, Species
                     where Species.Name = %s and
                         Geno.Name = %s and
                         Geno.SpeciesId = Species.Id
-                """, (species, this_trait.locus)).fetchone()
+                """ % (species, this_trait.locus)
+                logger.sql(query)
+                result = g.db.execute(query).fetchone()
 
                 if result:
                     if result[0] and result[1]:
@@ -1172,9 +1174,9 @@ def geno_mrna_confidentiality(ob):
     #logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
 
     query = '''SELECT Id, Name, FullName, confidentiality,
-                        AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table)
+                        AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table,ob.name)
     logger.sql(query)
-    result = g.db.execute(query, ob.name)
+    result = g.db.execute(query)
 
     (dataset_id,
      name,
diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py
index c62ea2fe..86ee1c52 100644
--- a/wqflask/utility/logger.py
+++ b/wqflask/utility/logger.py
@@ -32,7 +32,7 @@ from inspect import isfunction
 from pprint import pformat as pf
 from inspect import stack
 
-from utility.tools import LOG_LEVEL, LOG_SQL, LOG_FORMAT
+from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL, LOG_FORMAT
 
 class GNLogger:
     """A logger class with some additional functionality, such as
@@ -48,9 +48,13 @@ class GNLogger:
         """Set the undelying log level"""
         self.logger.setLevel(value)
 
-    def debug(self,*args):
-        """Call logging.debug for multiple args"""
-        self.collect(self.logger.debug,*args)
+    def debug(self,level=0,*args):
+        """Call logging.debug for multiple args. Use level=num to filter on
+LOG_LEVEL_DEBUG.
+
+        """
+        if level <= LOG_LEVEL_DEBUG:
+            self.collect(self.logger.debug,*args)
 
     def info(self,*args):
         """Call logging.info for multiple args"""
@@ -71,16 +75,20 @@ class GNLogger:
         if self.logger.getEffectiveLevel() < 30:
             self.collectf(self.logger.debug,*args)
 
-    def debugf(self,*args):
+    def debugf(self,level=0,*args):
         """Call logging.debug for multiple args lazily"""
         # only evaluate function when logging
-        if self.logger.getEffectiveLevel() < 20:
-            self.collectf(self.logger.debug,*args)
+        if level <= LOG_LEVEL_DEBUG:
+            if self.logger.getEffectiveLevel() < 20:
+                self.collectf(self.logger.debug,*args)
 
     def sql(self, sqlcommand, fun = None):
         """Log SQL command, optionally invoking a timed fun"""
         if LOG_SQL:
-            self.info(stack()[1][3],sqlcommand)
+            caller = stack()[1][3]
+            if caller in ['fetchone','fetch1','fetchall']:
+                caller = stack()[2][3]
+            self.info(caller,sqlcommand)
         if fun:
             result = fun(sqlcommand)
             if LOG_SQL:
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 93b1fa51..bb8241f5 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -43,12 +43,13 @@ def get_setting(command_id,guess=None):
     # ---- Check whether environment exists
     logger.debug("Looking for "+command_id+"\n")
     command = value(os.environ.get(command_id))
-    if command == None or command == "":
+    if command is None or command == "":
         # ---- Check whether setting exists in app
         command = value(app.config.get(command_id))
-        if command == None:
+        if command is None:
             command = value(guess)
-            if command == None or command == "":
+            if command is None or command == "":
+                print command
                 raise Exception(command_id+' setting unknown or faulty (update default_settings.py?).')
     logger.debug("Set "+command_id+"="+str(command))
     return command
@@ -61,7 +62,11 @@ def get_setting_bool(id):
 
 def get_setting_int(id):
     v = get_setting(id)
-    return int(v)
+    if isinstance(v, str):
+        return int(v)
+    if v is None:
+        return 0
+    return v
 
 def valid_bin(bin):
     if os.path.islink(bin) or valid_file(bin):
author	Pjotr Prins	2016-06-24 06:51:28 +0000
committer	Pjotr Prins	2016-06-24 06:51:28 +0000
commit	764136ff18bd355f7b9dbe91f870919b0a17322e (patch)
tree	2636b5c0d68d38e391476b4723a6f005d536b5c8
parent	923e94c39f5768f836dbf5fc29b0186c13ccc86b (diff)
download	genenetwork2-764136ff18bd355f7b9dbe91f870919b0a17322e.tar.gz