aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPjotr Prins2016-06-24 06:51:28 +0000
committerPjotr Prins2016-06-24 06:51:28 +0000
commit764136ff18bd355f7b9dbe91f870919b0a17322e (patch)
tree2636b5c0d68d38e391476b4723a6f005d536b5c8
parent923e94c39f5768f836dbf5fc29b0186c13ccc86b (diff)
downloadgenenetwork2-764136ff18bd355f7b9dbe91f870919b0a17322e.tar.gz
Log: and document SQL calls
-rwxr-xr-xbin/genenetwork22
-rw-r--r--doc/Architecture.org2
-rw-r--r--doc/database.org152
-rw-r--r--etc/default_settings.py5
-rw-r--r--wqflask/base/data_set.py10
-rw-r--r--wqflask/utility/logger.py24
-rw-r--r--wqflask/utility/tools.py13
7 files changed, 173 insertions, 35 deletions
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index bbb2a19f..d3bf3299 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -33,7 +33,7 @@ if [ -z $TEMPDIR ]; then
TEMPDIR="/tmp"
fi
-# Start the redis server
+echo "Starting the redis server:"
echo -n "dir $TEMPDIR
dbfilename gn2.rdb
" | redis-server - &
diff --git a/doc/Architecture.org b/doc/Architecture.org
index 7736e2c1..b9e8e355 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -34,6 +34,8 @@ search_result.py which invokes database functions in
wqflask/dbFunction/webqtlDatabaseFunction.py, for example. The
receiving template lives at [[https://github.com/genenetwork/genenetwork2/blob/master/wqflask/wqflask/templates/search_result_page.html][search_result_page.html]].
+For what happens at the database level see [[database.org]].
+
** GnServer (REST)
The [[https://github.com/genenetwork/gn_server][GnServer REST API]] is built on high performance [[http://elixir-lang.org/][Elixir]] with [[https://github.com/falood/maru][Maru]].
diff --git a/doc/database.org b/doc/database.org
index 2221c4fb..753179e6 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -7,7 +7,7 @@ number of the BSD's and the Human liver dataset (GSE9588).
* GeneNetwork database
-** Estimated table sizes
+** Estimated table sizes
select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length;
@@ -172,24 +172,24 @@ This table is being used by both GN1 and GN2 from the trait pages!
gn1/web/webqtl/showTrait/ShowTraitPage.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
gn1/web/webqtl/showTrait/ShowTraitPage.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip)
gn1/web/webqtl/textUI/cmdClass.py: query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400"""
-gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)"""
+gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)"""
gn2/wqflask/wqflask/show_trait/show_trait_page.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
gn2/wqflask/wqflask/show_trait/show_trait_page.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip)
When looking at the code in GN1 and GN2 it restricts the daily use of
the trait data page (set to 1,000 - whoever reaches that?). Unlike
mentioned in the schema description, this table does *not* keep track
-of cookies.
+of cookies.
From the code it looks like GN2 uses a mixture of Redis and sqlalchemy
to keep track of logged in sessions (see
-gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in
+gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in
model.py.
In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it
show_trait_page appears to be loaded (need to check).
-** AvgMethod
+** AvgMethod
Probesetfreeze refers to AvgMethod
@@ -235,7 +235,7 @@ Hooked in API (URL encoding)
GN2 only (see menu bar)
-** Ensembl*
+** Ensembl*
Probe information
@@ -321,15 +321,15 @@ Indel Snp browser (variant browser Gn1)
** Info*
-Infra system PhP
+Infra system PhP
-Data Info button
+Data Info button
Infosystem users has separate entries
-Also Investigators, User, Organizations,
+Also Investigators, User, Organizations,
-** LCorrRamin3
+** LCorrRamin3
Lit. Correlations Prof. Ramin
@@ -376,7 +376,7 @@ This table contains names, full descriptions, and short symbols for
traits and phenotype used primarily in the Published Phenotypes
databases.
-Contains 10k rows, March 2016, of which 5000 are for the BXDs).
+Contains 10k rows, March 2016, of which 5000 are for the BXDs).
| Id | Pre_publication_description | Post_publication_description | Original_description | Units | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter | Owner | Authorized_Users |
+----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
@@ -493,7 +493,7 @@ select count(*) from ProbeSet limit 5;
** ProbeSetData
-Probedata - main molecular data. Probesets, metabolome,
+Probedata - main molecular data. Probesets, metabolome,
Almost all important molecular assay data is in this table including
probe set data, RNA-seq data, proteomic data, and metabolomic
@@ -599,7 +599,7 @@ select * from ProbeSetFreeze limit 5;
Note that the following unlimited search is very slow:
-select max(value) from ProbeSetData;
+select max(value) from ProbeSetData;
+------------+
| max(value) |
@@ -617,12 +617,12 @@ Suggestions on the schema page:
"StrainId" should be "CaseId" or "SampleId".
-"ProbeSetData" should probably be "AssayData" or something more neutral.
+"ProbeSetData" should probably be "AssayData" or something more neutral.
*** Comments
I think the ProbeSetData table should be generalized to a 'phenotypes'
-table with an 'sample_id' column and a 'value' column.
+table with an 'sample_id' column and a 'value' column.
A new table 'samples' will link each sample against an 'experiment',
an 'individual' and which in turn can link to a 'strain'.
@@ -697,7 +697,7 @@ select * from Species;
+----+-----------+----------------------+----------------+----------------------+-------------------------+------------+---------+
-** InbredSet
+** InbredSet
Menu
@@ -723,10 +723,128 @@ User selection - retained
** UserPrivilege
-** Vlookup
+** Vlookup
* Fetching Data
+** Menu
+
+What happens at the main search menu can be emulated with curl.
+
+*** Search Mouse mRNA from HC_M2_0606_P dataset
+
+: curl "http://localhost:5003/search?species=mouse&group=BXD&ppocampus+mRNA&dataset=HC_M2_0606_P&search_terms_or=&search_terms_and=MEAN%3D%2815+16%29+LRS%3D%2823+46%29+&FormID=searchResult"
+
+the first SQL query checks whether a dataset is confidential or not:
+
+INFO:base.data_set:.sql: geno_mrna_confidentiality: u'SELECT Id, Name,
+FullName, confidentiality,AuthorisedUsers FROM ProbeSetFreeze WHERE
+Name = "HC_M2_0606_P"'
+
+The following is executed once per search:
+
+INFO:db.call:.sql: retrieve_other_names:
+ SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName, ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, Tissue.Name
+ FROM ProbeSetFreeze, ProbeFreeze, Tissue
+ WHERE ProbeSetFreeze.public > 0 AND
+ ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
+ ProbeFreeze.TissueId = Tissue.Id AND
+ (ProbeSetFreeze.Name = 'HC_M2_0606_P' OR ProbeSetFreeze.FullName = 'HC_M2_0606_P' OR ProbeSetFreeze.ShortName = 'HC_M2_0606_P')
+
+INFO:db.call:.sql: (112, 'HC_M2_0606_P', 'Hippocampus Consortium M430v2 (Jun06) PDNN', 'Hippocampus M430v2 BXD 06/06 PDNN', 'log2', 'Hippocampus mRNA')
+
+INFO:db.call:.sql: __init__:
+ SELECT
+ InbredSet.Name, InbredSet.Id
+ FROM
+ InbredSet, ProbeSetFreeze, ProbeFreeze
+ WHERE
+ ProbeFreeze.InbredSetId = InbredSet.Id AND
+ ProbeFreeze.Id = ProbeSetFreeze.ProbeFreezeId AND
+ ProbeSetFreeze.Name = "HC_M2_0606_P"
+
+INFO:db.call:.sql: ('BXD', 1)
+
+INFO:base.species:.sql: __init__:
+ Select
+ Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
+ where
+ Chr_Length.SpeciesId = InbredSet.SpeciesId AND
+ InbredSet.Name = 'BXD'
+ Order by OrderId
+
+And the following are executed for every trait:
+
+INFO:wqflask.do_search:.sql: execute: SELECT distinct ProbeSet.Name as TNAME, 0 as thistable, ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS, ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM, ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL, ProbeSet.name_num as TNAME_NUM FROM ProbeSetXRef, ProbeSet WHERE (( ProbeSetXRef.mean > 15.0 and ProbeSetXRef.mean < 16.0 )AND( ProbeSetXRef.LRS > 23.0 and ProbeSetXRef.LRS < 46.0 )) and ProbeSet.Id = ProbeSetXRef.ProbeSetId and ProbeSetXRef.ProbeSetFreezeId = 112 ORDER BY ProbeSet.symbol ASC
+
+INFO:base.trait:.sql: retrieve_info: SELECT ProbeSet.name,
+ ProbeSet.symbol, ProbeSet.description,
+ ProbeSet.probe_target_description, ProbeSet.chr,
+ ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid,
+ ProbeSet.genbankid, ProbeSet.unigeneid,
+ ProbeSet.omim, ProbeSet.refseq_transcriptid,
+ ProbeSet.blatseq, ProbeSet.targetseq,
+ ProbeSet.chipid, ProbeSet.comments,
+ ProbeSet.strand_probe, ProbeSet.strand_gene,
+ ProbeSet.probe_set_target_region,
+ ProbeSet.probe_set_specificity,
+ ProbeSet.probe_set_blat_score,
+ ProbeSet.probe_set_blat_mb_start,
+ ProbeSet.probe_set_blat_mb_end,
+ ProbeSet.probe_set_strand,
+ ProbeSet.probe_set_note_by_rw, ProbeSet.flag
+
+ FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+ WHERE
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetFreeze.Name = 'HC_M2_0606_P' AND
+ ProbeSet.Name = '1443823_s_at'
+
+INFO:base.trait:.sql: retrieve_info:
+ SELECT
+ HomologeneId
+ FROM
+ Homologene, Species, InbredSet
+ WHERE
+ Homologene.GeneId =98660 AND
+ InbredSet.Name = 'BXD' AND
+ InbredSet.SpeciesId = Species.Id AND
+ Species.TaxonomyId = Homologene.TaxonomyId
+
+INFO:base.trait:.sql: retrieve_info:
+ SELECT
+ ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
+ FROM
+ ProbeSetXRef, ProbeSet
+ WHERE
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSet.Name = "1443823_s_at" AND
+ ProbeSetXRef.ProbeSetFreezeId =112
+
+INFO:base.trait:.sql: retrieve_info:
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = 'mouse' and
+ Geno.Name = 'NES13033186' and
+ Geno.SpeciesId = Species.Id
+
+
+and finally mean and chromosome positions are fetched a second time
+for every trait with
+
+INFO:base.data_set:.sql: get_trait_info: select ProbeSetXRef.mean from ProbeSetXRef, ProbeSet
+ where ProbeSetXRef.ProbeSetFreezeId = 112 and
+ ProbeSet.Id = ProbeSetXRef.ProbeSetId and
+ ProbeSet.Name = '1443823_s_at'
+
+INFO:base.data_set:.sql: get_trait_info:
+ select Geno.Chr, Geno.Mb from Geno, Species
+ where Species.Name = 'mouse' and
+ Geno.Name = 'NES13033186' and
+ Geno.SpeciesId = Species.Id
+
+(that is a bug!).
+
** Fetch phenotypes
To get at phenotype data ProbeSetData is the main table (almost all
diff --git a/etc/default_settings.py b/etc/default_settings.py
index 29bc2133..d4eb5580 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -10,6 +10,9 @@
#
# Note also that in the near future we will additionally fetch
# settings from a JSON file
+#
+# Note that values for False and 0 have to be strings here - otherwise
+# Flask won't pick them up
import os
import sys
@@ -38,7 +41,7 @@ WEBSERVER_BRANDING = None # Set the branding (nyi)
WEBSERVER_DEPLOY = None # Deployment specifics (nyi)
LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL)
-LOG_LEVEL_DEBUG = 1 # Debug log level (0-5) (nyi)
+LOG_LEVEL_DEBUG = '0' # Debug log level (0-5, 0 = show all)
LOG_SQL = 'False' # Log SQL/backend and GN_SERVER calls
LOG_SQLALCHEMY = 'False'
LOG_BENCH = True # Log bench marks
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index ffc5917a..c70738f7 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -711,12 +711,14 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_location_value = 1000000
if this_trait.lrs:
- result = g.db.execute("""
+ query = """
select Geno.Chr, Geno.Mb from Geno, Species
where Species.Name = %s and
Geno.Name = %s and
Geno.SpeciesId = Species.Id
- """, (species, this_trait.locus)).fetchone()
+ """ % (species, this_trait.locus)
+ logger.sql(query)
+ result = g.db.execute(query).fetchone()
if result:
if result[0] and result[1]:
@@ -1172,9 +1174,9 @@ def geno_mrna_confidentiality(ob):
#logger.debug("dataset_table [%s]: %s" % (type(dataset_table), dataset_table))
query = '''SELECT Id, Name, FullName, confidentiality,
- AuthorisedUsers FROM %s WHERE Name = %%s''' % (dataset_table)
+ AuthorisedUsers FROM %s WHERE Name = "%s"''' % (dataset_table,ob.name)
logger.sql(query)
- result = g.db.execute(query, ob.name)
+ result = g.db.execute(query)
(dataset_id,
name,
diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py
index c62ea2fe..86ee1c52 100644
--- a/wqflask/utility/logger.py
+++ b/wqflask/utility/logger.py
@@ -32,7 +32,7 @@ from inspect import isfunction
from pprint import pformat as pf
from inspect import stack
-from utility.tools import LOG_LEVEL, LOG_SQL, LOG_FORMAT
+from utility.tools import LOG_LEVEL, LOG_LEVEL_DEBUG, LOG_SQL, LOG_FORMAT
class GNLogger:
"""A logger class with some additional functionality, such as
@@ -48,9 +48,13 @@ class GNLogger:
"""Set the undelying log level"""
self.logger.setLevel(value)
- def debug(self,*args):
- """Call logging.debug for multiple args"""
- self.collect(self.logger.debug,*args)
+ def debug(self,level=0,*args):
+ """Call logging.debug for multiple args. Use level=num to filter on
+LOG_LEVEL_DEBUG.
+
+ """
+ if level <= LOG_LEVEL_DEBUG:
+ self.collect(self.logger.debug,*args)
def info(self,*args):
"""Call logging.info for multiple args"""
@@ -71,16 +75,20 @@ class GNLogger:
if self.logger.getEffectiveLevel() < 30:
self.collectf(self.logger.debug,*args)
- def debugf(self,*args):
+ def debugf(self,level=0,*args):
"""Call logging.debug for multiple args lazily"""
# only evaluate function when logging
- if self.logger.getEffectiveLevel() < 20:
- self.collectf(self.logger.debug,*args)
+ if level <= LOG_LEVEL_DEBUG:
+ if self.logger.getEffectiveLevel() < 20:
+ self.collectf(self.logger.debug,*args)
def sql(self, sqlcommand, fun = None):
"""Log SQL command, optionally invoking a timed fun"""
if LOG_SQL:
- self.info(stack()[1][3],sqlcommand)
+ caller = stack()[1][3]
+ if caller in ['fetchone','fetch1','fetchall']:
+ caller = stack()[2][3]
+ self.info(caller,sqlcommand)
if fun:
result = fun(sqlcommand)
if LOG_SQL:
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index 93b1fa51..bb8241f5 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -43,12 +43,13 @@ def get_setting(command_id,guess=None):
# ---- Check whether environment exists
logger.debug("Looking for "+command_id+"\n")
command = value(os.environ.get(command_id))
- if command == None or command == "":
+ if command is None or command == "":
# ---- Check whether setting exists in app
command = value(app.config.get(command_id))
- if command == None:
+ if command is None:
command = value(guess)
- if command == None or command == "":
+ if command is None or command == "":
+ print command
raise Exception(command_id+' setting unknown or faulty (update default_settings.py?).')
logger.debug("Set "+command_id+"="+str(command))
return command
@@ -61,7 +62,11 @@ def get_setting_bool(id):
def get_setting_int(id):
v = get_setting(id)
- return int(v)
+ if isinstance(v, str):
+ return int(v)
+ if v is None:
+ return 0
+ return v
def valid_bin(bin):
if os.path.islink(bin) or valid_file(bin):