aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorzsloan2018-01-24 20:14:06 +0000
committerzsloan2018-01-24 20:14:06 +0000
commit4634cf87761980a1682473211259698b7be720e4 (patch)
tree7168293e7cf674a4250ea84080bd83112552ae93
parent623cdd2cfafc3d2dd45bc1adf460c9f3c5120e7a (diff)
parent56065c9f994c9247eaa17e5216d44d0e5e733aa2 (diff)
downloadgenenetwork2-4634cf87761980a1682473211259698b7be720e4.tar.gz
Merge branch 'testing' of github.com:genenetwork/genenetwork2 into testing
l---------[-rw-r--r--]VERSION2
-rwxr-xr-xbin/genenetwork29
-rw-r--r--doc/README.org26
-rw-r--r--doc/database.org165
-rw-r--r--etc/VERSION2
-rw-r--r--etc/default_settings.py11
-rw-r--r--wqflask/base/data_set.py2
-rw-r--r--wqflask/runserver.py12
-rw-r--r--wqflask/utility/logger.py2
-rw-r--r--wqflask/utility/tools.py30
10 files changed, 236 insertions, 25 deletions
diff --git a/VERSION b/VERSION
index 3e0b7cab..a9a7884c 100644..120000
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.10-pre4
+etc/VERSION \ No newline at end of file
diff --git a/bin/genenetwork2 b/bin/genenetwork2
index 5e791885..a7edb1c2 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -1,5 +1,11 @@
#! /bin/bash
#
+# Typical usage
+#
+# env GN2_PROFILE=~/opt/genenetwork2-phewas ./bin/genenetwork2
+#
+# Where GN2_PROFILE points to the GNU Guix profile used for deployment.
+#
# This will run the GN2 server (with default settings if none
# supplied). Typically you need a GNU Guix profile which is set with
# an environment variable (this profile is dictated by the
@@ -120,6 +126,9 @@ echo -n "dir $TMPDIR
dbfilename gn2.rdb
" | redis-server - &
+# Overrides for packages that are not yet public (currently r-auwerx)
+export R_LIBS_SITE=$R_LIBS_SITE:$HOME/.Rlibs/das1i1pm54dj6lbdcsw5w0sdwhccyj1a-r-3.3.2/lib/R/lib
+
# Start the flask server running GN2
cd $GN2_BASE_DIR/wqflask
echo "Starting with $settings"
diff --git a/doc/README.org b/doc/README.org
index a39ef603..937a9549 100644
--- a/doc/README.org
+++ b/doc/README.org
@@ -104,11 +104,29 @@ As root configure and run
: mysqld --datadir=/var/mysql --initialize-insecure
: mkdir -p /var/run/mysqld
: chown mysql.mysql ~/mysql /var/run/mysqld
-: su mysql -c mysqld --datadir=/var/mysql --explicit_defaults_for_timestamp -P 12048
+: mysqld -u mysql --datadir=/var/mysql --explicit_defaults_for_timestamp -P 12048"
-/etc/my.cnf
-[mysqld]
-user=root
+If you want to run as root you may have to set
+
+: /etc/my.cnf
+: [mysqld]
+: user=root
+
+To check error output in a file on start-up run with something like
+
+: mysqld -u mysql --console --explicit_defaults_for_timestamp --datadir=/gnu/mysql --log-error=~/test.log
+
+Other tips are that Guix installs mysqld in your profile, so this may work
+
+: /home/user/.guix-profile/bin/mysqld -u mysql --explicit_defaults_for_timestamp --datadir=/gnu/mysql
+
+When you get errors like:
+
+: qlalchemy.exc.IntegrityError: (_mysql_exceptions.IntegrityError) (1215, 'Cannot add foreign key constraint')
+
+you may need to set
+
+: set foreign_key_checks=0
** Load the small database in MySQL
diff --git a/doc/database.org b/doc/database.org
index 624174a4..5107b660 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -1,9 +1,19 @@
-- github Document reduction issue
+* Database Information
+
+WARNING: This document contains information on the GN databases which
+will change over time. The GN database is currently MySQL based and,
+while efficient, contains a number of design choices we want to grow
+'out' of. Especially with an eye on reproducibility we want to
+introduce versioning.
+
+So do not treat the information in this document as a final way of
+accessing data. It is better to use the
+[[https://github.com/genenetwork/gn_server/blob/master/doc/API.md][REST API]].
* The small test database (2GB)
The default install comes with a smaller database which includes a
-number of the BSD's and the Human liver dataset (GSE9588).
+number of the BXD's and the Human liver dataset (GSE9588).
* GeneNetwork database
@@ -750,9 +760,30 @@ show indexes from ProbeSetFreeze;
| 1 | 5 | 0.303492 |
+--------+----------+----------+
-** Publication and publishdata (all pheno)
+** Publication
+
+Publication:
+
+| Id | PubMed_ID | Abstract | Title | Pages | Month | Year |
+
-Phenotype pubs
+** Publishdata (all pheno)
+
+One of three phenotype tables.
+
+mysql> select * from PublishData limit 5;
++---------+----------+-------+
+| Id | StrainId | value |
++---------+----------+-------+
+| 8966353 | 349 | 29.6 |
+| 8966353 | 350 | 27.8 |
+| 8966353 | 351 | 26.6 |
+| 8966353 | 352 | 28.5 |
+| 8966353 | 353 | 24.6 |
++---------+----------+-------+
+5 rows in set (0.25 sec)
+
+See below for phenotype access.
** QuickSearch
@@ -1073,7 +1104,37 @@ select * from ProbeSetXRef limit 5;
i.e., for Strain Id 1 (DataId) 1, the locus '10.095.400' has a
phenotype value of 5.742.
-GeneNetwork1 already has a limited REST interface, if you do
+Interestingly ProbeData and PublishData have the same layout as
+ProbeSetData. ProbeData is only in use for Affy assays - and not used
+for computations. PublishData contains trait values. ProbeSetData.id
+matches ProbeSetXRef.DataId while PublishData.id matches
+PublishXRef.DataId.
+
+select * from PublishXRef limit 3;
++-------+-------------+-------------+---------------+---------+----------------+------------------+-----------+----------+-------------------------------------------------------+
+| Id | InbredSetId | PhenotypeId | PublicationId | DataId | Locus | LRS | additive | Sequence | comments |
++-------+-------------+-------------+---------------+---------+----------------+------------------+-----------+----------+-------------------------------------------------------+
+| 10001 | 8 | 1 | 1 | 8966353 | D2Mit5 | 10.18351644706 | -1.20875 | 1 | |
+| 10001 | 7 | 2 | 53 | 8966813 | D7Mit25UT | 9.85534330983917 | -2.86875 | 1 | |
+| 10001 | 4 | 3 | 81 | 8966947 | CEL-6_57082524 | 11.7119505898121 | -23.28875 | 1 | elissa modified Abstract at Tue Jun 7 11:38:00 2005 |
++-------+-------------+-------------+---------------+---------+----------------+------------------+-----------+----------+-------------------------------------------------------+
+3 rows in set (0.00 sec)
+
+ties the trait data (PublishData) with the inbredsetid (matching
+PublishFreeze.InbredSetId), locus and publication.
+
+select * from PublishFreeze -> ;
++----+------------+--------------------------+-------------+------------+--------+-------------+-----------------+-----------------+
+| Id | Name | FullName | ShortName | CreateTime | public | InbredSetId | confidentiality | AuthorisedUsers |
++----+------------+--------------------------+-------------+------------+--------+-------------+-----------------+-----------------+
+| 1 | BXDPublish | BXD Published Phenotypes | BXDPublish | 2004-07-17 | 2 | 1 | 0 | NULL |
+| 18 | HLCPublish | HLC Published Phenotypes | HLC Publish | 2012-02-20 | 2 | 34 | 0 | NULL |
++----+------------+--------------------------+-------------+------------+--------+-------------+-----------------+-----------------+
+2 rows in set (0.02 sec)
+
+which gives us the datasets.
+
+GeneNetwork1 has a limited REST interface, if you do
: curl "http://robot.genenetwork.org/webqtl/main.py?cmd=get&probeset=1443823_s_at&db=HC_M2_0606_P"
@@ -1082,6 +1143,9 @@ we get
: ProbeSetID B6D2F1 C57BL/6J DBA/2J BXD1 BXD2 BXD5 BXD6 BXD8 BXD9 BXD11 BXD12 BXD13 BXD15 BXD16 BXD19 BXD20 BXD21 BXD22 BXD23 BXD24 BXD27 BXD28 BXD29 BXD31 BXD32 BXD33 BXD34 BXD38 BXD39 BXD40 BXD42 BXD67 BXD68 BXD43 BXD44 BXD45 BXD48 BXD50 BXD51 BXD55 BXD60 BXD61 BXD62 BXD63 BXD64 BXD65 BXD66 BXD69 BXD70 BXD73 BXD74 BXD75 BXD76 BXD77 BXD79 BXD73a BXD83 BXD84 BXD85 BXD86 BXD87 BXD89 BXD90 BXD65b BXD93 BXD94 A/J AKR/J C3H/HeJ C57BL/6ByJ CXB1 CXB2 CXB3 CXB4 CXB5 CXB6 CXB7 CXB8 CXB9 CXB10 CXB11 CXB12 CXB13 BXD48a 129S1/SvImJ BALB/cJ BALB/cByJ LG/J NOD/ShiLtJ PWD/PhJ BXD65a BXD98 BXD99 CAST/EiJ KK/HlJ WSB/EiJ NZO/HlLtJ PWK/PhJ D2B6F1
: 1443823_s_at 15.251 15.626 14.716 15.198 14.918 15.057 15.232 14.968 14.87 15.084 15.192 14.924 15.343 15.226 15.364 15.36 14.792 14.908 15.344 14.948 15.08 15.021 15.176 15.14 14.796 15.443 14.636 14.921 15.22 15.62 14.816 15.39 15.428 14.982 15.05 15.13 14.722 14.636 15.242 15.527 14.825 14.416 15.125 15.362 15.226 15.176 15.328 14.895 15.141 15.634 14.922 14.764 15.122 15.448 15.398 15.089 14.765 15.234 15.302 14.774 14.979 15.212 15.29 15.012 15.041 15.448 14.34 14.338 14.809 15.046 14.816 15.232 14.933 15.255 15.21 14.766 14.8 15.506 15.749 15.274 15.599 15.673 14.651 14.692 14.552 14.563 14.164 14.546 15.044 14.695 15.162 14.772 14.645 15.493 14.75 14.786 15.003 15.148 15.221
+(see https://github.com/genenetwork/gn_server/blob/master/doc/API.md
+for the latest REST API).
+
getTraitData is defined in the file [[https://github.com/genenetwork/genenetwork/blob/master/web/webqtl/textUI/cmdClass.py#L134][web/webqtl/textUI/cmdClass.py]].
probe is None, so the code at line 199 is run
@@ -1165,6 +1229,97 @@ select * from ProbeSetData limit 5;
5 rows in set (0.00 sec)
linked by ProbeSetXRef.dataid.
+
+*** For PublishData:
+
+List datasets for BXD (InbredSetId=1):
+
+select * from PublishXRef where InbredSetId=1 limit 3;
++-------+-------------+-------------+---------------+---------+-----------+------------------+------------------+----------+--------------------------------------------------------------------------------+
+| Id | InbredSetId | PhenotypeId | PublicationId | DataId | Locus | LRS | additive | Sequence | comments |
++-------+-------------+-------------+---------------+---------+-----------+------------------+------------------+----------+--------------------------------------------------------------------------------+
+| 10001 | 1 | 4 | 116 | 8967043 | rs8253516 | 13.4974914158039 | 2.39444444444444 | 1 | robwilliams modified post_publication_description at Mon Jul 30 14:58:10 2012
+ |
+| 10002 | 1 | 10 | 116 | 8967044 | rs3666069 | 22.0042692151629 | 2.08178571428572 | 1 | robwilliams modified phenotype at Thu Oct 28 21:43:28 2010
+ |
+| 10003 | 1 | 15 | 116 | 8967045 | D18Mit4 | 15.5929163293343 | 19.0882352941176 | 1 | robwilliams modified phenotype at Mon May 23 20:52:19 2011
+ |
++-------+-------------+-------------+---------------+---------+-----------+------------------+------------------+----------+--------------------------------------------------------------------------------+
+
+where ID is the 'record' or, effectively, dataset.
+
+select distinct(publicationid) from PublishXRef where InbredSetId=1 limit 3;
++---------------+
+| publicationid |
++---------------+
+| 116 |
+| 117 |
+| 118 |
++---------------+
+
+select distinct
+PublishXRef.id,publicationid,phenotypeid,Phenotype.post_publication_description
+from PublishXRef,Phenotype where InbredSetId=1 and
+phenotypeid=Phenotype.id limit 3;
++-------+---------------+-------------+----------------------------------------------------------------------------------------------------------------------------+
+| id | publicationid | phenotypeid | post_publication_description |
++-------+---------------+-------------+----------------------------------------------------------------------------------------------------------------------------+
+| 10001 | 116 | 4 | Central nervous system, morphology: Cerebellum weight [mg] |
+| 10002 | 116 | 10 | Central nervous system, morphology: Cerebellum weight after adjustment for covariance with brain size [mg] |
+| 10003 | 116 | 15 | Central nervous system, morphology: Brain weight, male and female adult average, unadjusted for body weight, age, sex [mg] |
++-------+---------------+-------------+----------------------------------------------------------------------------------------------------------------------------+
+
+The id field is the same that is used in the GN2 web interface and the
+PublicationID ties the datasets together.
+
+To list trait values:
+
+SELECT Strain.Name, PublishData.id, PublishData.value from
+(Strain,PublishData, PublishXRef) Where PublishData.StrainId =
+Strain.id limit 3;
+
++------+---------+-------+
+| Name | id | value |
++------+---------+-------+
+| CXB1 | 8966353 | 29.6 |
+| CXB1 | 8966353 | 29.6 |
+| CXB1 | 8966353 | 29.6 |
++------+---------+-------+
+
+here id should match dataid again:
+
+SELECT Strain.Name, PublishData.id, PublishData.value from
+(Strain,PublishData, PublishXRef) Where PublishData.StrainId =
+Strain.id and PublishXRef.dataid=8967043 and
+PublishXRef.dataid=PublishData.id limit 3;
++------+---------+-------+
+| Name | id | value |
++------+---------+-------+
+| BXD1 | 8967043 | 61.4 |
+| BXD2 | 8967043 | 49 |
+| BXD5 | 8967043 | 62.5 |
++------+---------+-------+
+
+*** Datasets
+
+The REST API aims to present a unified interface for genotype and
+phenotype data. Phenotype datasets appear in two major forms in the
+database and we want to present them as one resource.
+
+Dataset names are defined in ProbeSetFreeze.name and Published.id ->
+publication (we'll ignore the probe dataset that uses
+ProbeFreeze.name). These tables should be meshed. It looks like the
+ids are non-overlapping with the publish record IDs starting at 10,001
+(someone has been smart, though it sets the limit of probesets now to
+10,000).
+
+The datasets are organized differently in these tables. All published
+BXD data is grouped on BXDpublished with the publications as
+'datasets'. So, that is how we list them in the REST API.
+
+To fetch all the datasets we first list ProbeSetFreeze entries. Then
+we list the Published entries.
+
** Fetch genotype information
*** SNPs
diff --git a/etc/VERSION b/etc/VERSION
index 1785aa28..b624c74a 100644
--- a/etc/VERSION
+++ b/etc/VERSION
@@ -1 +1 @@
-2.10rc3
+2.10rc5
diff --git a/etc/default_settings.py b/etc/default_settings.py
index c00f6c8f..59e22f1a 100644
--- a/etc/default_settings.py
+++ b/etc/default_settings.py
@@ -2,7 +2,7 @@
# webserver running in developer mode with limited console
# output. Copy this file and run it from ./bin/genenetwork2 configfile
#
-# Note that these settings are fetched in ./wqflask/utilities/tools.py
+# Note: these settings are fetched in ./wqflask/utilities/tools.py
# which has support for overriding them through environment variables,
# e.g.
#
@@ -14,8 +14,12 @@
# Note also that in the near future we will additionally fetch
# settings from a JSON file
#
-# Note that values for False and 0 have to be strings here - otherwise
+# Note: values for False and 0 have to be strings here - otherwise
# Flask won't pick them up
+#
+# For GNU Guix deployment also check the paths in
+#
+# ~/.guix-profile/lib/python2.7/site-packages/genenetwork2-2.0-py2.7.egg/etc/default_settings.py
import os
import sys
@@ -34,7 +38,7 @@ SECURITY_RECOVERABLE = True
SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org"
SECURITY_POST_LOGIN_VIEW = "/thank_you"
-SERVER_PORT = 5003
+SERVER_PORT = 5003 # running on localhost
SECRET_HMAC_CODE = '\x08\xdf\xfa\x93N\x80\xd9\\H@\\\x9f`\x98d^\xb4a;\xc6OM\x946a\xbc\xfc\x80:*\xebc'
# ---- Behavioural settings (defaults) note that logger and log levels can
@@ -42,6 +46,7 @@ SECRET_HMAC_CODE = '\x08\xdf\xfa\x93N\x80\xd9\\H@\\\x9f`\x98d^\xb4a;\xc6OM\x946a
WEBSERVER_MODE = 'DEV' # Python webserver mode (DEBUG|DEV|PROD)
WEBSERVER_BRANDING = None # Set the branding (nyi)
WEBSERVER_DEPLOY = None # Deployment specifics (nyi)
+WEBSERVER_URL = "http://localhost:"+str(SERVER_PORT)+"/" # external URL
LOG_LEVEL = 'WARNING' # Logger mode (DEBUG|INFO|WARNING|ERROR|CRITICAL)
LOG_LEVEL_DEBUG = '0' # logger.debugf log level (0-5, 5 = show all)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 6649f8af..a4eaaa2e 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -332,7 +332,7 @@ class DatasetGroup(object):
if check_plink_gemma():
marker_class = HumanMarkers
else:
- marker_class = Markers
+ marker_class = Markers
if self.genofile:
self.markers = marker_class(self.genofile[:-5])
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 50805643..50f134db 100644
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -22,11 +22,19 @@ ENDC = '\033[0m'
import os
app.config['SECRET_KEY'] = os.urandom(24)
-from utility.tools import WEBSERVER_MODE,get_setting_int
+from utility.tools import WEBSERVER_MODE,get_setting_int,get_setting,get_setting_bool
port = get_setting_int("SERVER_PORT")
-logger.info("GN2 is running. Visit %shttp://localhost:%s/%s" % (BLUE,port,ENDC))
+print("GN2 API server URL is ["+BLUE+get_setting("GN_SERVER_URL")+ENDC+"]")
+
+if get_setting_bool("USE_GN_SERVER"):
+ import requests
+ page = requests.get(get_setting("GN_SERVER_URL"))
+ if page.status_code != 200:
+ raise Exception("API server not found!")
+
+print("GN2 is running. Visit %s[http://localhost:%s/%s](%s)" % (BLUE,str(port),ENDC,get_setting("WEBSERVER_URL")))
werkzeug_logger = logging.getLogger('werkzeug')
diff --git a/wqflask/utility/logger.py b/wqflask/utility/logger.py
index bacb0aa4..128706df 100644
--- a/wqflask/utility/logger.py
+++ b/wqflask/utility/logger.py
@@ -72,7 +72,7 @@ LOG_LEVEL_DEBUG (NYI).
def warning(self,*args):
"""Call logging.warning for multiple args"""
self.collect(self.logger.warning,*args)
- self.logger.warning(self.collect(*args))
+ # self.logger.warning(self.collect(*args))
def error(self,*args):
"""Call logging.error for multiple args"""
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index c5685cdd..57f97a81 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -105,7 +105,7 @@ def js_path(module=None):
try_guix = get_setting("JS_GUIX_PATH")+"/"+module
if valid_path(try_guix):
return try_guix
- raise "No JS path found for "+module+" (check JS_GN_PATH)"
+ raise "No JS path found for "+module+" (if not in Guix check JS_GN_PATH)"
def pylmm_command(guess=None):
return assert_bin(get_setting("PYLMM_COMMAND",guess))
@@ -147,9 +147,14 @@ def assert_writable_dir(dir):
fh.close()
os.remove(fn)
except IOError:
- raise Exception('Unable to write test.txt to directory ' + dir )
+ raise Exception('Unable to write test.txt to directory ' + dir)
return dir
+def assert_file(fn):
+ if not valid_file(fn):
+ raise Exception('Unable to find file '+fn)
+ return fn
+
def mk_dir(dir):
if not valid_path(dir):
os.makedirs(dir)
@@ -174,6 +179,9 @@ def locate(name, subdir=None):
if subdir: sys.stderr.write(subdir)
raise Exception("Can not locate "+name+" in "+base)
+def locate_phewas(name, subdir=None):
+ return locate(name,'/phewas/'+subdir)
+
def locate_ignore_error(name, subdir=None):
"""
Locate a static flat file in the GENENETWORK_FILES environment.
@@ -239,15 +247,16 @@ USE_GN_SERVER = get_setting_bool('USE_GN_SERVER')
GENENETWORK_FILES = get_setting('GENENETWORK_FILES')
JS_GUIX_PATH = get_setting('JS_GUIX_PATH')
-# assert_dir(JS_GUIX_PATH) - don't enforce right now
+assert_dir(JS_GUIX_PATH)
JS_GN_PATH = get_setting('JS_GN_PATH')
# assert_dir(JS_GN_PATH)
-PYLMM_COMMAND = pylmm_command()
-GEMMA_COMMAND = gemma_command()
+PYLMM_COMMAND = app_set("PYLMM_COMMAND",pylmm_command())
+GEMMA_COMMAND = app_set("GEMMA_COMMAND",gemma_command())
+PLINK_COMMAND = app_set("PLINK_COMMAND",plink_command())
GEMMA_WRAPPER_COMMAND = gemma_wrapper_command()
-PLINK_COMMAND = plink_command()
-TEMPDIR = tempdir() # defaults to UNIX TMPDIR
+TEMPDIR = tempdir() # defaults to UNIX TMPDIR
+assert_dir(TEMPDIR)
# ---- Handle specific JS modules
JS_TWITTER_POST_FETCHER_PATH = get_setting("JS_TWITTER_POST_FETCHER_PATH",js_path("Twitter-Post-Fetcher"))
@@ -267,3 +276,10 @@ if os.environ.get('WQFLASK_OVERRIDES'):
else:
OVERRIDES[k] = cmd
logger.debug(OVERRIDES)
+
+# assert_file(PHEWAS_FILES+"/auwerx/PheWAS_pval_EMMA_norm.RData")
+# assert_dir(get_setting("JS_BIODALLIANCE"))
+# assert_file(get_setting("JS_BIODALLIANCE")+"/build/dalliance-all.js")
+# assert_file(get_setting("JS_BIODALLIANCE")+"/build/worker-all.js")
+# assert_dir(get_setting("JS_TWITTER_POST_FETCHER"))
+assert_file(JS_TWITTER_POST_FETCHER_PATH+"/js/twitterFetcher_min.js")