diff options
| author | zsloan | 2016-05-17 16:03:05 +0000 | 
|---|---|---|
| committer | zsloan | 2016-05-17 16:03:05 +0000 | 
| commit | 0d590051ce073a169295382e5adac34cc803e5ac (patch) | |
| tree | 76bea6f1f69288a498e9f13d20b638b942ab0a0c | |
| parent | f6cee1731772c3482b0afd4c77472d69e747c655 (diff) | |
| parent | 04afa563e6d53fe2a91ac2e6eb4af2f2fa5d5c3b (diff) | |
| download | genenetwork2-0d590051ce073a169295382e5adac34cc803e5ac.tar.gz | |
Merge branch 'staging' of github.com:genenetwork/genenetwork2
59 files changed, 2761 insertions, 826 deletions
| diff --git a/.gitignore b/.gitignore index 35be2d7b..4586a167 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,6 @@ *~ web/new_genotypes/HSNIH.json wqflask/secure_server.py -wqflask/base/webqtlConfig.py build/ dist/ *.egg-info/ diff --git a/INSTALL.md b/INSTALL.md deleted file mode 100644 index b8c9c977..00000000 --- a/INSTALL.md +++ /dev/null @@ -1,89 +0,0 @@ -# INSTALL Genenetwork2 (GN2) - -## Use a Docker image - -A Docker image can be generated from -[here](https://github.com/lomereiter/gn2-docker). - -## Fetch GN2 from github - -Clone the repository (currently ~800Mb) to local - - git clone git@github.com:genenetwork2/genenetwork2.git - -## Dependencies - -GN2 requires - -* python -* redis-server -* mysql-server - -## Required python modules - -Install the following python modules (it is probably wise to use a local -Python with environment for this) - -* Flask -* pyyaml -* redis -* qtlreaper -* numarray -* pp -* Flask-SQLAlchemy - -## Set up local file settings.py - -```python -LOGFILE = """/tmp/flask_gn_log""" - -#This is needed because Flask turns key errors into a -#400 bad request response with no exception/log -TRAP_BAD_REQUEST_ERRORS = True - -DB_URI = """mysql://gn2:password@localhost/db_webqtl""" -SQLALCHEMY_DATABASE_URI = 'mysql://gn2:password@localhost/db_webqtl' - -# http://pythonhosted.org/Flask-Security/configuration.html -SECURITY_CONFIRMABLE = True -SECURITY_TRACKABLE = True -SECURITY_REGISTERABLE = True -SECURITY_RECOVERABLE = True - -SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org" -SECURITY_POST_LOGIN_VIEW = "/thank_you" -SQLALCHEMY_POOL_RECYCLE = 3600 - -SERVER_PORT = 5051 - -SECRET_HMAC_CODE = '*' -``` - -```sh -# Use a working copy of python -export python=$HOME/ve27/bin/python -export WQFLASK_SETTINGS=$HOME/settings.py -source /home/pjotr/ve27/bin/activate -cd genenetwork2/wqflask -$python ./runserver.py - -or - -$python ./secure_server.py -``` - -## Running tools - -### pylmm - -To run pylmm check out the repository at https://github.com/genenetwork/pylmm_gn2. - -Next update the setting.py file to point at the tree - -GN2 can locate PYLMM through PYLMM_PATH in setting.py (or in ENV) - - PYLMM_PATH = '/home/test/opensource/python/pylmm_gn2' - -## Other information - -Check also the ./misc/ directory for settings diff --git a/MANIFEST.in b/MANIFEST.in index f4ea2316..bf23f9aa 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,10 +17,6 @@ include wqflask/wqflask/templates/new_security/* include wqflask/wqflask/correlation/* include wqflask/wqflask/interval_mapping/* include wqflask/wqflask/interval_analyst/* -include wqflask/wqflask/my_pylmm/* -include wqflask/wqflask/my_pylmm/scripts/* -include wqflask/wqflask/my_pylmm/pyLMM/* -include wqflask/wqflask/my_pylmm/data/* include wqflask/wqflask/static/* include wqflask/wqflask/static/dbdoc/* include wqflask/wqflask/static/new/* @@ -260,3 +256,4 @@ include wqflask/flask_security/templates/security/email/* include wqflask/dbFunction/* include wqflask/basicStatistics/* include wqflask/base/* +include etc/*.py diff --git a/README-wqflask.txt b/README-wqflask.txt deleted file mode 100644 index 03e3a1cc..00000000 --- a/README-wqflask.txt +++ /dev/null @@ -1,51 +0,0 @@ -This readme concerns the directory wqflask - an officially sanctioned fork of the main GeneNetwork -code. It's still very early in the process - but we eventually want to port all of the code -in GeneNetwork to Flask and Jinja2. For more information about the project in general, see -the file README.md. - -For more information about the port to Flask, please keep reading. - -************************* - -Requirements: - -* Python 2.7 - -* virtualenv 1.7.1.2 or later - -* Other python dependencies are listed in the file wqflask/requirements.txt - -************************** - -Installation: - -We highly recommend you create a virtual enviornment called ve27 in your home directory. - - -Get into your home directory -> cd ~ - -Create a virtual environment -> virtualenv ve27 - -Activate the environment -> source ~/ve27/bin/activate - -Install dependencies -> pip install -r ~/gene/wqflask/requirements.txt -(Or replace gene with the name of the directory holding your repository) - -************************** - -Running the program: - -Assuming your enviornment is activated (source ~/ve27/bin/activate) just run: - -> python ~/gene/wqflask/runserver.py - -The program as configured runs on port 5000 and does not serve static files. - -You'll have to run a webserver to serve pages on port 80 and to server the static files (or -flask could also be configured to serve the static pages). - -A sample configuration file for nginx is in the directory: wqflask/other_config/nginx.conf diff --git a/README.md b/README.md index 3d95d05f..4fe739bf 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,27 @@ -genenetwork on github (May 7, 2012 by Lei Yan and Rob Williams) +# GENENETWORK -www.genenetwork.org +This repository contains the source code for the GeneNetwork server +http://www.genenetwork.org/ (version 2 aka GN2). -Released under Affero General Public License 3 (AGPLv3). See also -LICENSE.txt +## Install -For background see: http://en.wikipedia.org/wiki/Genenetwork +The recommended installation is with GNU Guix which allows you to +deploy GN2 and dependencies as a self contained unit on any machine. +The database can be run separately as well as the source tree (for +developers). See the [installation docs](doc/README.org). -WWW service initiated January, 1994 as The Portable Dictionary of the Mouse Genome and -June 15, 2001 as WebQTL. +## License -This code and the main web service is currently operated by Lei Yan, Zachary Sloan, -Arthur Centeno. Design and code by Xiaodong Zhou, Christian Fernandez, Sam Ockman, Ning Liu, Rudi Alberts, -Elissa Chesler, Jintao Wang, Kenneth Manly, Robert W. Williams, and colleagues. +The GeneNetwork2 source code is released under the Affero General +Public License 3 (AGPLv3). See [LICENSE.txt](LICENSE.txt). -Code and primary web service managed by Dr. Robert W. Williams and the University of Tennessee Health Science Center, -Memphis TN, USA. +## More information -Email labwilliams@gmail.com or rwilliams@uthsc.edu +For more information visit http://www.genenetwork.org/ -Older version available on SourceForge http://sourceforge.net/projects/genenetwork/ +## Contact -Funded by the National Institutes of Health and -University of Tennessee Center for Integrative and Translational Genomics +IRC on #genenetwork on irc.freenode.net. - -=========== +Code and primary web service managed by Dr. Robert W. Williams and the +University of Tennessee Health Science Center, Memphis TN, USA. diff --git a/bin/genenetwork2 b/bin/genenetwork2 new file mode 100755 index 00000000..bbb2a19f --- /dev/null +++ b/bin/genenetwork2 @@ -0,0 +1,44 @@ +#! /bin/bash +# +# This will run the GN2 server (with default settings if none supplied). +# +# Environment settings can be used to preconfigure as well as a +# settings.py file. + +# Absolute path to this script, e.g. /home/user/bin/foo.sh +SCRIPT=$(readlink -f "$0") +# Absolute path this script is in, thus /home/user/bin +GN2_BASE_PATH=$(dirname $(dirname "$SCRIPT")) + +GN2_GUIX_PATH=$GN2_BASE_PATH/lib/python2.7/site-packages/genenetwork2-2.0-py2.7.egg +if [ -d $GN2_GUIX_PATH ]; then + GN2_BASE_PATH=$GN2_GUIX_PATH +fi +echo $GN2_BASE_PATH + +# Handle settings parameter +settings=$1 +if [ -z $settings ]; then settings=$GN2_BASE_PATH/etc/default_settings.py ; fi +if [ ! -e $settings ]; then + echo "ERROR: can not locate settings file - pass it in the command line" + exit 1 +fi +export WQFLASK_SETTINGS=$settings + +# We may change this one: +export PYTHONPATH=$GN2_BASE_PATH/wqflask:$PYTHONPATH + +# TEMPDIR defaults to /tmp if nothing else +if [ -z $TEMPDIR ]; then + TEMPDIR="/tmp" +fi + +# Start the redis server +echo -n "dir $TEMPDIR +dbfilename gn2.rdb +" | redis-server - & + +# Start the flask server running GN2 +cd $GN2_BASE_PATH/wqflask +echo "Starting with $settings" +/usr/bin/env python runserver.py diff --git a/bin/test-website b/bin/test-website new file mode 100755 index 00000000..24274713 --- /dev/null +++ b/bin/test-website @@ -0,0 +1,66 @@ +#!/usr/bin/env ruby + + +USAGE = <<EOT +This is Mechanical-Rob - an automated web server tester for + Genenetwork.org that uses the brilliant + mechanize gem. + +To use this software you need to install mechanize. Run it from +the root of the genenetwork2 source tree as, for example, + + ./bin/test-website http://localhost:5003/ (default) + +For more information see http://genenetwork.org/ + +EOT +$stderr.print USAGE + +require 'optparse' + +options = {} +opts = OptionParser.new do |o| + o.banner = "Usage: #{File.basename($0)} [options] URL" + + o.on('-l','--link-checker', 'Check for dead links') do + options[:link_checker] = true + end + + o.separator "" + o.on_tail('-h', '--help', 'display this help and exit') do + options[:show_help] = true + end +end + +opts.parse!(ARGV) + +if options[:show_help] + print opts + exit 1 +end + +$host = + if ARGV.size>0 + ARGV.shift + else + "http://localhost:5003" + end + +$stderr.print "Testing <",$host,">\n" + +require 'mechanize' +require 'minitest/spec' +require 'minitest/autorun' + +# These are the actual testing modules + +libpath = File.dirname(File.dirname(__FILE__)) +$: << File.join(libpath,'test/lib') + +if options[:link_checker] + require 'link_checker' +else + require 'main_web_functionality' +end + + diff --git a/doc/GUIX-Reproducible-from-source.org b/doc/GUIX-Reproducible-from-source.org index b88eb9e8..4399ea26 100644 --- a/doc/GUIX-Reproducible-from-source.org +++ b/doc/GUIX-Reproducible-from-source.org @@ -4,6 +4,7 @@ - [[#introduction][Introduction]] - [[#binary-deployment][Binary deployment]] - [[#from-source-deployment][From source deployment]] + - [[#create-archive][Create archive]] * Introduction @@ -31,5 +32,23 @@ Next build guix (and run) following the instructions in [[https://github.com/pjo Once that is done we can add the guix-bioinformatics path with -: env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A slurm +: env GUIX_PACKAGE_PATH=../guix-bioinformatics command +So + +#+begin_src sh :lang bash +#+begin_src sh :lang bash +gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A genenetwork +genenetwork1 1.0-d622c803b out ../guix-bioinformatics/gn/packages/bioinformatics.scm:163:2 +genenetwork2 2.0-9e9475053 out ../guix-bioinformatics/gn/packages/bioinformatics.scm:215:2 +#+end_src sh :lang bash + +Install with + +#+begin_src sh :lang bash +gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -i genenetwork2 +#+end_src sh :lang bash + +* Create archive + +: env GUIX_PACKAGE_PATH=../../genenetwork/guix-bioinformatics/ ./pre-inst-env guix archive --export -r genenetwork2 > guix_gn2-2.0-9e9475053.nar diff --git a/doc/GUIX-archive.org b/doc/GUIX-archive.org new file mode 100644 index 00000000..67ab5cd0 --- /dev/null +++ b/doc/GUIX-archive.org @@ -0,0 +1,106 @@ +* Binary deployment + +Note binary deployment is not working pending a few improvements +to GNU Guix. See source deployment instead. + +** Install Guix using a tar ball + +GN can be deployed either as a binary tarball or as a GNU Guix +package. First install GNU Guix following the instructions of the +[[https://www.gnu.org/software/guix/manual/html_node/Binary-Installation.html#Binary-Installation][binary installation]] using a tar ball from [[https://www.gnu.org/software/guix/download/][here]]. + +With guix-daemon running you should be able to install the hello +package: + +: guix package -i hello + +** Fix locale + +You may want to + +#+begin_src sh :lang bash +export GUIX_LOCPATH=$HOME/.guix-profile/lib/locale +export LC_ALL=en_US.utf8 +#+end_src sh :lang bash + +** Authorize our archives + +Next add our archive key to guix (as root): + +#+begin_src scheme +echo "(public-key + (ecc + (curve Ed25519) + (q #E9A95686D8437186302E07C7AB9BF3913F435026C2D389AF27D9C66FD6EBB649#) + ) + ) +"|guix archive --authorize +#+end_src scheme + +if you have trouble finding a suitable guix try + +: ls /gnu/store/*guix-*/bin/guix + +and you should be able to use this directly, e.g. + +: alias guix=/gnu/store/632msbms2yaldfnlrb5lbnlnmn9yjisw-guix-0.9.0/bin/guix +: guix --version + +** Download and install the GN2 archive + +Find the archive on + + http://files.genenetwork.org/software/ + +download and install with + +#+begin_src bash +guix archive --import < genenetwork2-data-hash.nar +#+end_src bash + +and you should see a list of packages installing, e.g. + +#+begin_src bash +importing path `/gnu/store/l1zs2drn3zdzl5ysjcmhibcpa35p9zfc-python2-mysqlclient-1.3.7' +importing path `/gnu/store/n7kfg4knibvblggy8ci2liscl7vz5wkg-python2-parallel-1.6.4' +importing path `/gnu/store/qvv16qwlq59gp5d07lwbf5n8ndsi3il3-python2-sqlalchemy-1.0.11' +importing path `/gnu/store/qw872mbmr9ir0a9drv9xw9pvjk05ywwy-python2-xlsxwriter-0.8.4' +importing path `/gnu/store/wc112m1xfy3p08v14bdzay2ki2rirdsm-pylmm-gn2-1.0-3c6d1cac8' +importing path `/gnu/store/zfkcy17c2ks3cd9ks14irdabqvmlfpyn-python2-flask-sqlalchemy-2.1' +importing path `/gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff4' +(etc.) +#+end_src bash + +The following packages need to be added and the R path set + +: export R_LIBS_SITE="/home/wrk/.guix-profile/site-library/" +: guix package -i /gnu/store/w0dqg9dshq53j8xhcnqgvnvms2s6y5k5-r-wgcna-1.49-425bc170cc0873ddbd414675ac40f6d4d724c7cb +: guix package -i /gnu/store/k60bdlm0v7xic88j2z5c1jb1jvc371mn-r-qtl-1.38-4 + +You can add the last one to your profile + +: guix package -i /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff +: export PATH=~/.guix-profile/bin:$PATH +: genenetwork2 + + or run it directly with + +: /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff/bin/genenetwork2 + + + +** Other + +Update guix with a 'guix pull' and make guix visible in the path. +More information exists also in my [[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org][guix-notes]]. + +With guix running you should be able to install python, for example. + +: guix package -i python2 + +This will make python appear in $HOME/.guix-profile/bin/python. Suggested +environment settings can be seen with + +: guix package --search-paths + + diff --git a/doc/README.org b/doc/README.org index f6ab6a52..345341e1 100644 --- a/doc/README.org +++ b/doc/README.org @@ -1,28 +1,678 @@ -#+TITLE: Installing GeneNetwork services with GNU Guix + +#+TITLE: Installing GeneNetwork services * Table of Contents :TOC: - [[#introduction][Introduction]] - - [[#binary-deployment][Binary deployment]] - - [[#from-source-deployment][From source deployment]] + - [[#source-deployment][Source deployment]] + - [[#install-guix][Install guix]] + - [[#checkout-the-git-repositories][Checkout the git repositories]] + - [[#update-guix][Update guix]] + - [[#install-gn2][Install GN2]] + - [[#run-gn2][Run GN2]] + - [[#run-mysql-server][Run MySQL server]] + - [[#run-your-own-copy-of-gn2][Run your own copy of GN2]] + - [[#set-up-nginx-port-forwarding][Set up nginx port forwarding]] + - [[#source-deployment-and-other-information-on-reproducibility][Source deployment and other information on reproducibility]] + - [[#trouble-shooting][Trouble shooting]] + - [[#importerror-no-module-named-jinja2][ImportError: No module named jinja2]] + - [[#error-can-not-find-directory-homegn2_data][ERROR: can not find directory $HOME/gn2_data]] + - [[#cant-run-a-module][Can't run a module]] * Introduction -Large system deployments tend to get very complex. In this document we -explain the GeneNetwork deployment system which is based on GNU Guix -(see Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]). +Large system deployments can get very complex. In this document we +explain the GeneNetwork version 2 (GN2) reproducible deployment system +which is based on GNU Guix (see also Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]). The Guix +system can be used to install GN with all its files and dependencies. + +The official installation path is from a checked out version of the +main Guix package tree and that of the Genenetwork package +tree. Current supported versions can be found as the SHA values of +'gn-latest' branches of [[https://github.com/genenetwork/guix-bioinformatics/tree/gn-latest][Guix bioinformatics]] and [[https://github.com/genenetwork/guix/tree/gn-latest][GNU Guix main]]. + +* Source deployment +** Install guix -* Binary deployment +Deploying from source is also straightforward. Install GNU Guix using +a binary tar ball as described [[https://github.com/pjotrp/guix-notes][here]]. -NYA +If it works you should be able to install a package with -* From source deployment +: guix package -i hello -GNU Guix allows for [[https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org][reproducible deployment]] based on a checked out -Guix repository - use gn-stable for that: +** Checkout the git repositories -#+begin_src sh :lang bash +Check out the two relevant guix and guix-bioinformatics git +repositories: + +#+begin_src bash +cd ~ mkdir genenetwork cd genenetwork -git checkout https://github.com/genenetwork/guix.git gn-stable-guix -git checkout https://github.com/genenetwork/guix-bioinformatics.git +git clone --branch gn-latest https://github.com/genenetwork/guix-bioinformatics +git clone --branch gn-latest --recursive https://github.com/genenetwork/guix guix-gn-latest +cd guix-gn-latest +#+end_src bash + +** Update guix + +At some point you may decide to create, install and run a recent +version of the guix-daemon by compiling the guix repository. Follow +[[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org#building-gnu-guix-from-source-using-guix][these]] steps carefully. + +** Install GN2 + +#+begin_src bash +env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env \ + guix package -i genenetwork2 --fallback +#+end_src bash + +Note that you can use the genenetwork.org guix substitute caching +server at http://guix.genenetwork.org:8080 (which speeds up installs +significantly because all packages are pre-built). Here an IRC session +where we installed GN2 from scratch using GNU Guix and a download +of the test database: + +#+begin_src +<pjotrp> time to get binary install sorted :) [07:03] +<pjotrp> Guix is designed for distributed installation servers +<pjotrp> we have one on guix.genenetwork.org +<pjotrp> it contains all the prebuild packages +<pjotrp> for GN +<user01> okay [07:04] +<pjotrp> let's step back however [07:05] +<pjotrp> I presume the environment is set with all guix package --search-paths +<pjotrp> right? +<user01> yep +<user01> set to the ones in ~/.guix-profile/ +<pjotrp> good, and you are in gn-latest-guix repo [07:06] +<user01> yep [07:07] +<pjotrp> git log shows + +Author: David Thompson <dthompson2@worcester.edu> +Date: Sun Mar 27 21:20:19 2016 -0400 + +<user01> yes +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix + package -A genenetwork2 [07:08] +<pjotrp> shows + +genenetwork2 2.0-a8fcff4 out ../guix-bioinformatics/gn/packages/genenetwork.scm:144:2 +genenetwork2-database-small 1.0 out ../guix-bioinformatics/gn/packages/genenetwork.scm:270:4 +genenetwork2-files-small 1.0 out ../guix-bioinformatics/gn/packages/genenetwork.scm:228:4 + +<user01> yeah [07:09] +<pjotrp> OK, we are in sync. This means we should be able to install the exact + same software +<pjotrp> I need to start up my guix daemon - I usually run it in a screen +<pjotrp> screen -S guix-daemon +<user01> hah, I don't have screen installed yet [07:11] +<pjotrp> comes with guix ;) [07:12] +<pjotrp> no worries, you can run it any way you want +<pjotrp> $HOME/.guix-profile/bin/guix-daemon --build-users-group=guixbuild +<user01> then something's weird, because it says I don't have it +<pjotrp> oh, you need to install it first [07:13] +<pjotrp> guix package -A screen +<pjotrp> screen 4.3.1 out gnu/packages/screen.scm:34:2 +<pjotrp> but you can skip this install, for now +<user01> alright [07:14] +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix + package -i genenetwork2 --dry-run +<pjotrp> substitute: updating list of substitutes from + 'https://mirror.hydra.gnu.org'... 79.1% +<pjotrp> you see that? +<pjotrp> followed by [07:15] +substitute: updating list of substitutes from +'https://hydra.gnu.org'... 100.0% +The following derivations would be built: + /gnu/store/rk7nw0rjqqsha958m649wrykadx6mmhl-profile.drv + +/gnu/store/7b0qjybvfx8syzvfs7p5rdablwhbkbvs-module-import-compiled.drv + /gnu/store/cy9zahbbf23d3cqyy404lk9f50z192kp-module-import.drv + /gnu/store/ibdn603i8grf0jziy5gjsly34wx82lmk-gtk-icon-themes.drv + +<pjotrp> which should have the same HASH values /gnu/store/7b0qjybvf... etc. + [07:16] +<user01> profile has a different hash +<pjotrp> but the next ones? +<user01> they're the same +<pjotrp> not sure why profile differs. Do you see the contact with + mirror.hydra.org? [07:17] +<user01> yeah +<pjotrp> OK, that means you set the key correctly for that one :) +<pjotrp> alright we are at the same state now. You can see most packages need + to be rebuild because they are no longer cached as binaries on hydra + [07:18] +<pjotrp> things move fast... +<user01> hehe +<pjotrp> let me also do the same on my laptop - which I have staged before + [07:19] +<pjotrp> btw, to set the path I often do [07:20] +<pjotrp> export + PATH="/home/wrk/.guix-profile/bin:/home/wrk/.guix-profile/sbin":$PATH +<pjotrp> to keep things like 'screen' from Debian +<pjotrp> Once past building guix itself that is normally OK [07:21] +<user01> ah, okay +<user01> will do that +<pjotrp> the guix build requires certain versions of tools, so you don't want + to mix foreign tools in [07:23] +<user01> makes sense [07:24] +<pjotrp> On my laptop I am trying the main updating list of substitutes from + 'http://hydra.gnu.org'... 10.5% [07:27] +<pjotrp> it is a bit slow, but let's see if there is a difference with the + mirror +<pjotrp> you can see there are two servers here. Actually with recent daemons, + if the mirror fails it will try the main server [07:28] +<pjotrp> I documented the use of a caching server here [07:29] +<pjotrp> https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org +<pjotrp> this is exactly what we are doing now +<user01> alrighty [07:35] +<pjotrp> To see if a remote server has a guix server running it should respond + [07:36] +<pjotrp> lynx http://guix.genenetwork.org:8080 --dump +<pjotrp> Resource not found: / +<pjotrp> +<pjotrp> you see that? +<user01> yes [07:37] +<pjotrp> good. The main hydra server is too slow. So on my laptop I forced + using the mirror with [07:38] +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix + package -i genenetwork2 --dry-run + --substitute-urls="http://mirror.hydra.gnu.org" +<pjotrp> +<pjotrp> the list looks the same to me [07:40] +<user01> me too +<pjotrp> note that some packages will be built and some downloaded, right? + [07:41] +<user01> yes +<pjotrp> atlas is actually a binary on my system [07:43] +<pjotrp> I mean in that list +<pjotrp> so, it should not build. Same as yours? +<user01> yeah, atlas and r-gtable are the ones to be downloaded +<pjotrp> You should not have seen that error ;) +<pjotrp> we should try and install it this way, try [07:44] +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix + package -i genenetwork2 --cores=4 --max-jobs=4 --keep-going [07:46] +<pjotrp> set CPUs and max-jobs to something sensible +<pjotrp> Does your VM have multiple cores? +<pjotrp> note you can always press Ctrl-C during install +<user01> it doesn't, I'll reboot it and give it another core [07:47] +<user02> Hey [07:48] +<user02> I'm here +<user02> Will be stepping away for some breakfast +<pjotrp> Can you do the same as us +<pjotrp> Can you see the irc log +<user02> Alright +<user02> Yes, I can +<user02> Please email me a copy in five minutes +<pjotrp> user01: so when I use the GN server [07:56] +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix + package -i genenetwork2 --dry-run + --substitute-urls=http://guix.genenetwork.org:8080 +<pjotrp> I don't need to build anything [07:57] +<pjotrp> (this won't work for you, yet) +<pjotrp> to get it to work you need to 'trust' it [07:58] +<pjotrp> but, first get the build going +<pjotrp> I'll have a coffee while you and get building +<user01> yeah it's doing its thing now [08:01] +<pjotrp> cool [08:02] +<pjotrp> in a separate terminal you can try and install with the gn mirror + [08:05] +<pjotrp> I'll send you the public key and you can paste it as said + https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org + [08:06] +<user01> alright +<pjotrp> should be in the E-mail [08:09] +<pjotrp> getting it working it kinda nasty since the server gives no feedback +<pjotrp> it works when you see no more in the build list ;) [08:11] +<pjotrp> btw, you can install software in parallel. Guix does that. +<pjotrp> even the same packages +<pjotrp> so keep building ;) +<pjotrp> try and do this with Debian... +<pjotrp> coffee for me [08:12] +<user01> the first build failed [08:15] +<pjotrp> OK, Dennis fixed that one yesterday [08:27] +<pjotrp> the problem is that sometime source tarballs disappear [08:28] +<pjotrp> R is notorious for that +<user01> haha, that's inconvenient.. +<pjotrp> well, it is good that Guix catches them +<pjotrp> but we do not cache sources +<pjotrp> binaries are cached - to some degree - so we don't have to rebuild + those [08:29] +<pjotrp> time to use the guix cache at guix.genenetwork.org +<pjotrp> try and install the key (it is in the E-mail) +<pjotrp> and see what this lists [08:31] +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix + package -i genenetwork2 + --substitute-urls=http://guix.genenetwork.org:8080 --dry-run +<pjotrp> should be all binary installs +<user01> it's not.. [08:32] +<user01> if I remove --substitute-urls, the list changes, does that mean I + have the key set up correctly at least? [08:33] +<pjotrp> dunno [08:35] +<pjotrp> how many packages does it want to build? +<pjotrp> should be zero +<user01> four +<pjotrp> Ah, that is OK - those are default profile things +<user01> genenetwork2 is among the ones to be downloaded so [08:36] +<pjotrp> remove --dry-run +<pjotrp> yeah, good sign :) +<pjotrp> we'll still hit a snag, but run it +<pjotrp> should be fast +<user01> doing it [08:37] +<user01> it worked! [08:38] +<user01> I think [08:39] +<pjotrp> heh [08:40] +<pjotrp> you mean it is finished? +<user01> yep +<pjotrp> type genenetwork2 +<user01> complains about not being able to connect to the database [08:41] +<pjotrp> last snag :) +<pjotrp> no database +<pjotrp> well, we succeeded in installing a same-byte install of a very + complex system :) [08:42] +<pjotrp> (always take time to congratulate yourself) +<pjotrp> now we need to install mysql +<user01> hehe :) +<pjotrp> this can be done throug guix or through debian [08:43] +<pjotrp> the latter is a bit easier here, so let's do that +<pjotrp> fun note: you can mix debian and guix +<pjotrp> Follow instructions on [08:44] +<pjotrp> + https://github.com/genenetwork/genenetwork2/tree/staging/doc#run-mysql-server +<pjotrp> apt-get install mysql-common [08:45] +<pjotrp> may do it +<pjotrp> You can also install with guix, but I need to document that +<pjotrp> btw your internet must be fast :) [08:46] +<user01> hehe it is ;) +<pjotrp> when the database is installed [08:48] +<pjotrp> be sure to set the password as instructed [08:50] +<pjotrp> when mysql is set the genenetwork2 command should fire up the web + server on localhost:5003 [08:58] +<pjotrp> btw my internet is way slower :) [09:00] +<user02> I'm back [09:04] +<user02> fixed router firmware upgrade problem +<user02> unbricking +<pjotrp> tssk [09:07] +<user02> I'll never leave routers to update themselves again [09:08] +<user02> self-brick highway +<user02> Resuming [09:09] +<pjotrp> auto-updates are evil +<pjotrp> always switch them off +<pjotrp> user02: can you install genenetwork like user has done? [09:10] +<pjotrp> pretty well documented here now :) +<user02> Yes I can [09:11] +<user02> Already installed key +<pjotrp> user02: you are getting binary packages only now? [09:13] +<user02> That's the sanest way to go now +<user02> seriously +<pjotrp> everything should be pre-built from guix.genenetwork.org +<pjotrp> you are downloading? +<user02> yes [09:15] +<pjotrp> cool. Maybe an idea to set up a server +<pjotrp> for your own use +<user02> Stuck at downloading preprocesscore +<pjotrp> should not [09:24] +<pjotrp> what does env GUIX_PACKAGE_PATH=../guix-bioinformatics/ + ./pre-inst-env guix package -i genenetwork2 + --substitute-urls="http://guix.genenetwork.org:8080" --dry-run + [09:25] +<pjotrp> say for r-prepocesscore +<pjotrp> download or build? +<pjotrp> mine says download [09:26] +<user02> it only lists the derivatives to be built +<user02> nothing else happens [09:27] +<pjotrp> OK, so there is a problem +<pjotrp> your key may not be working +<pjotrp> everything should be listed as 'to be download' [09:28] +<user02> Hmm +<user02> Ah +<user02> I know where I messed up +<pjotrp> where? +<user02> I did add the key +<user02> However +<pjotrp> (I am documenting) +<user02> I did not tell guix to trust it +<pjotrp> yes +<pjotrp> and there is another potential problem +<user02> Remember the documentation on installing guix? +<user02> You have to tell guix to trust the default key [09:29] +<user02> Right? +<user02> So in this case +<pjotrp> read the IRC log +<user02> That step is mandatory +<pjotrp> user01: how are you doing? +<pjotrp> user02: + https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org#using-gnu-guix-archive + [09:30] +<user01> a little bit left on the db download +<pjotrp> user02: you should see no more building +<pjotrp> user02: another issue may be that you updated r-preprocesscore + package in guix-buinformatics [09:32] +<pjotrp> all downstream packages will want to rebuild +<user02> no, not really +<user02> It's not even installed +<pjotrp> checkout a branch of the the old version - make sure we are in synch +<pjotrp> should be at + /gnu/store/y1f3r2xs3fhyadd46nd2aqbr2p9qv2ra-r-biocpreprocesscore-1.32.0 + [09:33] +<pjotrp> +<user03> pjotrp: Possibly we should use the archive utility of Guix to do + deployment to avoid such out-of-sync differences :) [09:34] +<pjotrp> maybe. I did not get archive to update profiles properly [09:37] +<pjotrp> Also it is good that they get to understand guix + this way +<pjotrp> carved in stone, eh [09:38] +<user02> Yeah, all good [09:39] +<user02> My mistake was skipping the guix archive part +<user02> Can we begin with the install? +<user02> It's telling me of derivatives that will be downloaded [09:40] +<user02> So we're good +<user02> Here goes +<pjotrp> yeeha [09:42] +<user02> pjotrp, where is this guix.genenetwork.org located at? +<pjotrp> Tennessee +<user02> It's...it's....sloooooooowwwwwwwwwwwwww +<pjotrp> not from Europe +<pjotrp> is it downloading at all? +<user02> It should be extended +<user02> Yes...like at 100KB/s [09:43] +<user02> tear-jerker +<user02> Verizon problems +<user02> who's the host? +<pjotrp> I am getting 500Kb/s +<pjotrp> UT +<user02> Guix's servers can run off more than one server, right? +<user02> I'd like to host that particular server here +<user02> For speed +<pjotrp> yes +<user02> Sooner or later +<user02> It will be a necessity [09:45] +<pjotrp> exactly what I am doing - this is our server +<pjotrp> guix.genenetwork.org:8080 +<user02> All done installing [09:46] +<pjotrp> what? +<user02> Now the databases +<pjotrp> what do you mean by slow exactly? +<user02> Yes, it's installed +<pjotrp> can you run genenetwork2 +<user02> setting variables +<user02> If I try running it now, it will fail as I don't have the DBs [09:47] +<pjotrp> cool - you had a lot of prebuilt packages already +<pjotrp> OK, follow the instructions I wrote above +<user01> now everything seems to be working for me :) +<user02> OK +<pjotrp> user01: excellent! +<pjotrp> you see a webserver? +<user01> yep, can connect to localhost:5003 [09:48] +<pjotrp> So now you are running a guix copy of GN2 +<pjotrp> you can see where it lives with `which genenetwork2` or ls -l + ~/.guix-profile/bin/genenetwork2 [09:49] +<pjotrp> + /gnu/store/1kma5xszvzsvmbb4k699h7gvdncw901i-genenetwork2-2.0-a8fcff4/bin/genenetwork2 +<pjotrp> it is a script +<pjotrp> written by guix, open it [09:50] +<pjotrp> inside it points to paths and our script at +<pjotrp> + /gnu/store/1kma5xszvzsvmbb4k699h7gvdncw901i-genenetwork2-2.0-a8fcff4/bin/.genenetwork2-real +<pjotrp> if you open that you can see how the webserver is started [09:51] +<pjotrp> next step is to run a recent version of GN2 +<user01> okay [09:52] +<pjotrp> See + https://github.com/genenetwork/genenetwork2/tree/staging/doc#run-your-own-copy-of-gn2 +<pjotrp> but do not checkout that genetwork2_diet +<pjotrp> we reverted to the main tree +<pjotrp> clone git@github.com:genenetwork/genenetwork2.git [09:53] +<pjotrp> instead and checkout the staging branch +<pjotrp> that is effectively my branch [09:54] +<pjotrp> when that is done you should be able to fire up the webserver from + there [09:55] +<pjotrp> using ./bin/genenetwork2 +<user02> now installing DBs +<user02> Downloading +<pjotrp> annoyingly the source tree is ~700Mb [09:56] +<user02> Can it also be done by installing the guix package + genenetwork2-database-small? +<pjotrp> I changed it in the diet version to 8Mb, but I had to revert +<user01> I need to make my VM bigger... +<pjotrp> user02: not ready [09:57] +<user02> ok +<pjotrp> user01: sorry +<pjotrp> user01: you could mount a local dir inside the VM for development +<pjotrp> that would allow you to use MAC tools for editing +<pjotrp> just an idea +<user01> yeah, I figure I'll do something like that +<pjotrp> do you use emacs? [09:58] +<user01> yep +<pjotrp> that can also run on remote files over ssh +<pjotrp> that's an alternative +<pjotrp> kudos for using emacs :), wdyt user03 +<user02> 79 minutes to go downloading the db +<pjotrp> user02: sorry about that [09:59] +<pjotrp> it is 2GB +<user02> user, you can also mount the directory via sshfs +<user02> Mac OSX runs OpenSSH +<pjotrp> user02: sopa +<user02> You can therefore mount a directory outside the VM to the VM via + sshfs [10:00] +<pjotrp> yes, 3 options now +<user02> That way, you can set up a VM only for it's logic +<user02> Apps + the OS it runs [10:01] +<user02> For data, let it reside on physical host accessible via sshfs +<user02> Use this Arch wiki reference: + https://wiki.archlinux.org/index.php/SSHFS +<user02> I edited that last somewhere in 2015, may have been updated since + then +<user01> alright, cool! [10:04] +<pjotrp> user01: you are almost done [10:06] +<pjotrp> I wrote an elixir package for guix :) +<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix + package -A elixir + --substitute-urls="http://guix.genenetwork.org:8080" [10:08] +<pjotrp> elixir 1.2.3 out + ../guix-bioinformatics/gn/packages/elixir.scm:31:2 +<pjotrp> +<pjotrp> I am building it on guix.genenetwork.org right now [10:09] +<user01> nice [10:10] #+end_src + +** Run GN2 + +Make a note of the paths with + +#+begin_src bash +./pre-inst-env guix package --search-paths +#+end_src bash + +After setting the paths for the server + +#+begin_src bash +export PATH=~/.guix-profile/bin:$PATH +export PYTHONPATH="$HOME/.guix-profile/lib/python2.7/site-packages" +export R_LIBS_SITE="$HOME/.guix-profile/site-library/" +export GUIX_GTK3_PATH="$HOME/.guix-profile/lib/gtk-3.0" +export GI_TYPELIB_PATH="$HOME/.guix-profile/lib/girepository-1.0" +export XDG_DATA_DIRS="$HOME/.guix-profile/share" +export GIO_EXTRA_MODULES="$HOME/.guix-profile/lib/gio/modules" +#+end_src bash + +run the main script (in ~/.guix-profile/bin) + +#+begin_src bash +genenetwork2 +#+end_src bash + +will start the default server which listens on port 5003, i.e., +http://localhost:5003/. + +** Run MySQL server + +At this point we require the underlying distribution to install +and run mysqld. + +Download one of + +http://files.genenetwork.org/raw_database/ +https://s3.amazonaws.com/genenetwork2/db_webqtl_s.zip + +Check the md5sum. + +After installation inflate the database binary in the MySQL directory +(this is subject to change soon) + +: chown -R mysql:mysql db_webqtl_s/ +: chmod 700 db_webqtl_s/ +: chmod 660 db_webqtl_s/* + +restart MySQL service (mysqld). Login as root and + +: mysql> show databases; +: +--------------------+ +: | Database | +: +--------------------+ +: | information_schema | +: | db_webqtl_s | +: | mysql | +: | performance_schema | +: +--------------------+ + +Set permissions and match password in your settings file below: + +: mysql> grant all privileges on db_webqtl_s.* to gn2@"localhost" identified by 'mysql_password'; + +Note that if the mysql connection is not working, try connecting to +the IP address and check server firewall, hosts.allow and mysql IP +configuration. + +** Run your own copy of GN2 + +At some point you may want to fix the source code. Assuming you have +Guix and Genenetwork2 installed (as described above) clone the GN2 +repository from https://github.com/genenetwork/genenetwork2_diet + +Copy-paste the paths into your terminal (mainly so PYTHON_PATH and +R_LIBS_SITE are set) from the information given by guix: + +: guix package --search-paths + +Inside the repository: + +: cd genenetwork2 +: ./bin/genenetwork2 + +Will fire up your local repo http://localhost:5003/ using the +settings in ./etc/default_settings.py. These settings may +not reflect your system. To override settings create your own from a copy of +default_settings.py and pass it into GN2 with + +: ./bin/genenetwork2 $HOME/my_settings.py + +and everything *should* work (note the full path to the settings +file). This way we develop against the exact same dependency graph of +software. + +If something is not working, take a hint from the settings file +that comes in the Guix installation. It sits in something like + +: cat ~/.guix-profile/lib/python2.7/site-packages/genenetwork2-2.0-py2.7.egg/etc/default_settings.py + +** Set up nginx port forwarding + +nginx can be used as a reverse proxy for GN2. For example, we want to +expose GN2 on port 80 while it is running on port 5003. Essentially +the configuration looks like + +#+begin_src js + server { + listen 80; + server_name test-gn2.genenetwork.org; + access_log logs/test-gn2.access.log; + + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + location / { + proxy_set_header Host $http_host; + proxy_set_header Connection keep-alive; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $server_name; + proxy_pass http://127.0.0.1:5003; + } +} +#+end_src js + +Install the nginx webserver (as root) + +: guix package -i nginx + +The nginx example configuration examples can be found in the Guix +store through + +: ls -l /root/.guix-profile/sbin/nginx +: lrwxrwxrwx 3 root guixbuild 66 Dec 31 1969 /root/.guix-profile/sbin/nginx -> /gnu/store/g0wrcl5z27rmk5b52rldzvk1bzzbnz2l-nginx-1.8.1/sbin/nginx + +Use that path + +: ls /gnu/store/g0wrcl5z27rmk5b52rldzvk1bzzbnz2l-nginx-1.8.1/share/nginx/conf/ +: fastcgi.conf koi-win scgi_params +: fastcgi.conf.default mime.types scgi_params.default +: fastcgi_params mime.types.default uwsgi_params +: fastcgi_params.default nginx.conf uwsgi_params.default +: koi-utf nginx.conf.default win-utf + +And copy any relevant files to /etc/nginx. A configuration file for +GeneNetwork (reverse proxy) port forwarding can be found in the source +repository under ./etc/nginx-genenetwork.conf. Copy this file to /etc +(still as root) +: cp ./etc/nginx-genenetwork.conf /etc/nginx/ + +Make dirs + +: mkdir -p /var/spool/nginx/logs + +Add users + +: adduser nobody ; addgroup nobody + +Run nginx + +: /root/.guix-profile/sbin/nginx -c /etc/nginx/nginx-genenetwork.conf -p /var/spool/nginx + +* Source deployment and other information on reproducibility + +See the document [[GUIX-Reproducible-from-source.org]]. + +* Trouble shooting + +** ImportError: No module named jinja2 + +If you have all the Guix packages installed this error points out that +the environment variables are not set. Copy-paste the paths into your +terminal (mainly so PYTHON_PATH and R_LIBS_SITE are set) from the +information given by guix: + +: guix package --search-paths + +On one system: + +: export PYTHONPATH="$HOME/.guix-profile/lib/python2.7/site-packages" +: export R_LIBS_SITE="$HOME/.guix-profile/site-library/" +: export GEM_PATH="$HOME/.guix-profile/lib/ruby/gems/2.2.0" + +and perhaps a few more. +** ERROR: can not find directory $HOME/gn2_data + +The default settings file looks in your $HOME/gn2_data. Since these +files come with a Guix installation you should take a hint from the +values in the installed version of default_settings.py (see above in +this document). + +** Can't run a module + +In rare cases, development modules are not brought in with Guix +because no source code is available. This can lead to missing modules +on a running server. Please check with the authors when a module +is missing. diff --git a/doc/database.org b/doc/database.org new file mode 100644 index 00000000..e06ac1ff --- /dev/null +++ b/doc/database.org @@ -0,0 +1,710 @@ +- github Document reduction issue + + +* GeneNetwork Database + +** Estimated table sizes + + +select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length; + ++-------------------------+------------+ +| table_name | Size in MB | ++-------------------------+------------+ +| ProbeSetData | 59358.80 | +| SnpAll | 15484.67 | +| ProbeData | 22405.44 | +| SnpPattern | 9177.05 | +| ProbeSetSE | 14551.02 | +| QuickSearch | 5972.86 | +| ProbeSetXRef | 4532.89 | +| LCorrRamin3 | 18506.53 | +| ProbeSE | 6263.83 | +| ProbeSet | 2880.21 | +| Probe | 2150.30 | +| GenoData | 3291.91 | +| CeleraINFO_mm6 | 989.80 | +| pubmedsearch | 1032.50 | +| ProbeXRef | 743.38 | +| GeneRIF_BASIC | 448.54 | +| BXDSnpPosition | 224.44 | +| EnsemblProbe | 133.66 | +| EnsemblProbeLocation | 105.49 | +| Genbank | 37.71 | +| TissueProbeSetData | 74.42 | +| AccessLog | 42.38 | +| GeneList | 34.11 | +| Geno | 33.90 | +| MachineAccessLog | 28.34 | +| IndelAll | 22.42 | +| PublishData | 22.54 | +| TissueProbeSetXRef | 14.73 | +| ProbeH2 | 13.26 | +| GenoXRef | 22.83 | +| TempData | 8.35 | +| GeneList_rn3 | 5.54 | +| GORef | 4.97 | +| Phenotype | 6.50 | +| temporary | 3.59 | +| InfoFiles | 3.32 | +| Publication | 3.42 | +| Homologene | 5.69 | +| Datasets | 2.31 | +| GeneList_rn33 | 2.61 | +| PublishSE | 4.71 | +| GeneRIF | 2.18 | +| Vlookup | 1.87 | +| H2 | 2.18 | +| PublishXRef | 2.18 | +| NStrain | 4.80 | +| IndelXRef | 2.91 | +| Strain | 1.07 | +| GeneMap_cuiyan | 0.51 | +| user_collection | 0.30 | +| CaseAttributeXRef | 0.44 | +| StrainXRef | 0.56 | +| GeneIDXRef | 0.77 | +| Docs | 0.17 | +| News | 0.17 | +| ProbeSetFreeze | 0.22 | +| GeneRIFXRef | 0.24 | +| Sample | 0.06 | +| login | 0.06 | +| user | 0.04 | +| TableFieldAnnotation | 0.05 | +| DatasetMapInvestigator | 0.05 | +| User | 0.04 | +| ProbeFreeze | 0.06 | +| TableComments | 0.02 | +| Investigators | 0.02 | +| DBList | 0.03 | +| Tissue | 0.02 | +| GeneChip | 0.01 | +| GeneCategory | 0.01 | +| SampleXRef | 0.01 | +| InbredSet | 0.01 | +| SnpAllele_to_be_deleted | 0.00 | +| Organizations | 0.01 | +| PublishFreeze | 0.00 | +| GenoFreeze | 0.00 | +| Chr_Length | 0.01 | +| SnpSource | 0.00 | +| AvgMethod | 0.00 | +| Species | 0.00 | +| Dataset_mbat | 0.00 | +| TissueProbeFreeze | 0.00 | +| EnsemblChip | 0.00 | +| TissueProbeSetFreeze | 0.01 | +| UserPrivilege | 0.00 | +| CaseAttribute | 0.00 | +| MappingMethod | 0.00 | +| DBType | 0.00 | +| InfoFilesUser_md5 | 0.00 | +| GenoCode | 0.00 | +| DatasetStatus | 0.00 | +| GeneChipEnsemblXRef | 0.00 | +| GenoSE | 0.00 | +| user_openids | 0.00 | +| roles_users | 0.00 | +| role | 0.00 | +| Temp | NULL | ++-------------------------+------------+ +97 rows in set, 1 warning (0.01 sec) + +All *Data tables are large + +** User access + +According to the meta data: + +This table tracks access time and IP addresses. Used for logging in +registered users and tracking cookies. + +# GN1 uses access table and GN2 uses user table (true/false?) + + select * from AccessLog limit 5; ++-------+---------------------+----------------+ +| id | accesstime | ip_address | ++-------+---------------------+----------------+ +| 12174 | 2003-10-28 02:17:41 | 130.120.104.71 | +| 12173 | 2003-10-28 02:16:27 | 130.120.104.71 | +| 3 | 2003-02-22 07:38:33 | 192.117.159.1 | +| 4 | 2003-02-22 07:49:13 | 192.117.159.1 | +| 5 | 2003-02-22 07:51:08 | 192.117.159.1 | ++-------+---------------------+----------------+ + +select * from AccessLog order by accesstime desc limit 5; ++---------+---------------------+---------------+ +| id | accesstime | ip_address | ++---------+---------------------+---------------+ +| 1025735 | 2016-02-08 14:23:29 | 100.43.81.157 | +| 1025734 | 2016-02-08 13:54:28 | 180.76.15.144 | +| 1025733 | 2016-02-08 13:43:37 | 66.249.65.217 | +| 1025732 | 2016-02-08 13:39:50 | 66.249.65.217 | +| 1025731 | 2016-02-08 13:15:46 | 66.249.65.217 | ++---------+---------------------+---------------+ + +Quite a few trait page hits: + +select count(*) from AccessLog; + ++----------+ +| count(*) | ++----------+ +| 1025685 | ++----------+ + +show indexes from AccessLog; ++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | ++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| AccessLog | 0 | PRIMARY | 1 | id | A | 1025685 | NULL | NULL | | BTREE | | | ++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ + +This table is being used by both GN1 and GN2 from the trait pages! + +: grep -ir AccessLog *|grep -e "^gn1\|^gn2"|grep \.py|grep -v doc + +gn1/web/webqtl/showTrait/ShowTraitPage.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \ +gn1/web/webqtl/showTrait/ShowTraitPage.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip) +gn1/web/webqtl/textUI/cmdClass.py: query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400""" +gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)""" +gn2/wqflask/wqflask/show_trait/show_trait_page.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \ +gn2/wqflask/wqflask/show_trait/show_trait_page.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip) + +When looking at the code in GN1 and GN2 it restricts the daily use of +the trait data page (set to 1,000 - whoever reaches that?). Unlike +mentioned in the schema description, this table does *not* keep track +of cookies. + +From the code it looks like GN2 uses a mixture of Redis and sqlalchemy +to keep track of logged in sessions (see +gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in +model.py. + +In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it +show_trait_page appears to be loaded (need to check). + +** AvgMethod + +Probesetfreeze refers to AvgMethod + +** BXDSnPosition + +Snp table (all snps) + +Mapping in GN1 shows snps when you select a chromosome. + +** CaseAttribute(XRef) + +Metadata + +** CeleralINFO_mm6 + +? + +** Chr_Length + +Default mm9, column for mm8 + +** Dataset_mbat + +Menu for BXD (linkouts) + +** DatasetMapInvestigator + +Arthur? + +** DataSets + +Information/metadata + +** DatasetStatus + +Arthur private/public + +** DBList and DBType + +Hooked in API (URL encoding) + +** Docs + +GN2 only (see menu bar) + +** Ensembl* + +Probe information + +(will be deprecated) + +** Genbank + +Linkout and not important + +** GeneCategory + +Not important. GeneWiki notes function classification. + +Deprecate. + +** GeneChip + +** GeneIDXRef + +Interspecies gene comparison + +** GeneList + +Track info + +** Genlist_rn3(3) + +Rat list + +** GeneMap_cuiyan + +Link outs + +** GeneRIF + +Wiki info (nightly updated from NCBI) + +XRef should be foreign keys + +** Geno + +SNP or marker info + +** GenoCode + +Belongs to someone else + +** GenoData + +Allele info + +** GenoFreeze + +Big menu (Freeze refers to menu) + +** GenoSE + +SE standard err, not used + +** GenoXREF + +Very important. Key links between Geno, GenoData + +** GORef + +GO terms + +** H2 + +Heritability for probeset(?) + +** Homologene + +Homology, not used much + +** InbredSet + +Group in menu + +** Indelall, SnpAll, SnpPattern, SnpSource + +Indel Snp browser (variant browser Gn1) + +** Info* + +Infra system PhP + +Data Info button + +Infosystem users has separate entries + +Also Investigators, User, Organizations, + +** LCorrRamin3 + +Lit. Correlations Prof. Ramin + +** Login + +GN2 login info + +** MachineAccessLog + +Old + +** MappingMethod + +GN1 + +** News + +GN2 + +** NStrain + +pheno publishfreeze (menu) + xref (keys) + xref links to publish (pubmed), phenotype, pubishdata +geno genofreeze + xref (keys) + xref links to publish (pubmed), genotype, genodata +probeset/expr. probesetfreeze + xref (keys) + xref links to publish (pubmed), probeset, probesetdata +probe/expr. probefreeze + xref (keys) + xref links to publish (pubmed), probe, probedata + +Each dataset has 3 values (real value (1), number of samples (2), stderr (3)) + +NStrain = number of phenotype samples + +ProbesetFreeze contains all data, incl. metabolomic. + +** Phenotype + +This table contains names, full descriptions, and short symbols for +traits and phenotype used primarily in the Published Phenotypes +databases. + +Contains 10k rows, March 2016, of which 5000 are for the BXDs). + +| Id | Pre_publication_description | Post_publication_description | Original_description | Units | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter | Owner | Authorized_Users | ++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+ +| 1 | NULL | Hippocampus weight | Original post publication description: Hippocampus weight | Unknown | NULL | HPCWT | NULL | robwilliams | NULL | robwilliams | +| 2 | NULL | Cerebellum weight | Original post publication description: Cerebellum weight | mg | NULL | CBLWT | NULL | robwilliams | NULL | robwilliams | +| 3 | NULL | Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide [units/100 ug protein] | Original post publication description: Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide [units/100 ug protein] | units/100 ug protein | NULL | IL1Activity | NULL | robwilliams | NULL | robwilliams | +| 4 | NULL | Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg] | Original post publication description: Cerebellum weight [mg] | mg | NULL | CBLWT2 | NULL | robwilliams | NULL | robwilliams | +| 5 | NULL | The coat color of 79 BXD RI strain | Original post publication description: The coat color of 79 BXD RI strain | Unknown | NULL | CoatColor | NULL | robwilliams | NULL | robwilliams | ++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+ +5 rows in set (0.00 sec) + +** ProbeData + +Table with fine-grained probe level Affymetrix data only. Contains 1 +billion rows March 2016. This table may be deletable since it is only +used by the Probe Table display in GN1. Not used in GN2 +(double-check). + +In comparison the "ProbeSetData" table contains more molecular assay +data, including probe set data, RNA-seq data, proteomic data, and +metabolomic data. 2.5 billion rows March 2016. In comparison, +ProbeData contains data only for Affymetrix probe level data +(e.g. Exon array probes and M430 probes). + +"ProbeData.StrainId" should be "CaseId" or "SampleId". + +"ProbeData" should probably be "AssayData" or something more neutral. + +select * from ProbeData limit 2; ++--------+----------+---------+ +| Id | StrainId | value | ++--------+----------+---------+ +| 503636 | 42 | 11.6906 | +| 503636 | 43 | 11.4205 | ++--------+----------+---------+ +2 rows in set (0.00 sec) + +select count(*) from ProbeData limit 2; ++-----------+ +| count(*) | ++-----------+ +| 976753435 | ++-----------+ +1 row in set (0.00 sec) + +** ProbeSet + +Comment: PLEASE CHANGE TABLE NAME and rework fields carefully. This is +a terrible table but it works well (RWW March 2016). It is used in +combination with the crucial TRAIT DATA and ANALYSIS pages in GN1 and +GN2. It is also used by annotators using the UPDATE INFO AND DATA web +form to correct and update annotation. It is used by Arthur to enter +new annotation files and metadata for arrays, genes, proteins, +metabolites. The main problem with this table is that it is doing too +much work. + +Initially (2003) this table contained only Affymetrix ProbeSet data +for mouse (U74aV2 initially). Many other array platforms for different +species were added. At least four other major categories of molecular +assays have been added since about 2010. + +1. RNA-seq annotation and sequence data for transcripts using ENSEMBL + identifiers or NCBI NM_XXXXX and NR_XXXXX type identifiers + +2. Protein and peptide annotation and sequence data (see BXD Liver + Proteome data, SRM and SWATH type data) with identifiers such as + "abcb10_q9ji39_t311" for SRM data and "LLGNMIVIVLGHHLGKDFTPAAQAA" + for SWATH data where the latter is just the peptide fragment that + has been quantified. Data first entered in 2015 for work by Rudi + Aebersold and colleagues. + +3. Metabolite annotation and metadata (see BXD Liver Metabolome data) + with identifiers that are usually Mass charge ratios such as + "149.0970810_MZ" + +4. Epigenomic and methylome data (e.g. Human CANDLE Methylation data + with identifiers such as "cg24523000") + +It would make good sense to break this table into four or more types +of molecular assay metadata or annotation tables) (AssayRNA_Anno, +AssayProtein_Anno, AssayMetabolite_Anno, AssayEpigenome_Anno, +AssayMetagenome_Anno), since these assays will have many differences +in annotation content compared to RNAs. + +Some complex logic is used to update contents of this table when +annotators modify and correct the information (for example, updating +gene symbols). These features requested by Rob so that annotating one +gene symbol in one species would annotate all gene symbols in the same +species based on common NCBI GeneID number. For example, changing the +gene alias for one ProbeSet.Id will changing the list of aliases in +all instances with the same gene symbol. + +If the ProbeSet.BlatSeq (or is this ProbSetTargetSeq) is identical +between different ProbeSet.Ids then annotation is forced to be the +same even if the symbol or geneID is different. This "feature" was +implemented when we found many probe sets with identical sequence but +different annotations and identifiers. + + +select count(*) from ProbeSet limit 5; ++----------+ +| count(*) | ++----------+ +| 4351030 | ++----------+ + +| Id | ChipId | Name | TargetId | Symbol | description | Chr | Mb | alias | GeneId | GenbankId | SNP | BlatSeq | TargetSeq | UniGeneId | Strand_Probe | Strand_Gene | OMIM | comments | Probe_set_target_region | Probe_set_specificity | Probe_set_BLAT_score | Probe_set_Blat_Mb_start | Probe_set_Blat_Mb_end | Probe_set_strand | Probe_set_Note_by_RW | flag | Symbol_H | description_H | chromosome_H | MB_H | alias_H | GeneId_H | chr_num | name_num | Probe_Target_Description | RefSeq_TranscriptId | Chr_mm8 | Mb_mm8 | Probe_set_Blat_Mb_start_mm8 | Probe_set_Blat_Mb_end_mm8 | HomoloGeneID | Biotype_ENS | ProteinID | ProteinName | Flybase_Id | HMDB_ID | Confidence | ChEBI_ID | ChEMBL_ID | CAS_number | PubChem_ID | ChemSpider_ID | UNII_ID | EC_number | KEGG_ID | Molecular_Weight | Nugowiki_ID | Type | Tissue | PrimaryName | SecondaryNames | PeptideSequence | ++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+ +| 7282 | 1 | 93288_at | NULL | Arpc2 | actin related protein 2/3 complex, subunit 2 | 1 | 74.310961 | AK008777 | 76709 | AI835883 | 0 | CCGACTTCCTTAAGGTGCTCAACCGGACTGCTTGCTACTGGATAATCGTGAGGGATTCTCCATTTGGGTTCCATTTTGTACGAGTTTGGCAAATAACCTGCAGAAACGAGCTGTGCTTGCAAGGACTTGATAGTTCCTAATCCTTTTCCAAGCTGTTTGCTTTGCAATATGT | ccgacttccttaaggtgctcaaccgtnnnnnnccnannnnccnagaaaaaagaaatgaaaannnnnnnnnnnnnnnnnnnttcatcccgctaactcttgggaactgaggaggaagcgctgtcgaccgaagnntggactgcttgctactggataatcgtnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntgagggattctccatttgggttccattttgtacgagtttggcaaataacctgcagaaacgagctgtgcttgcaaggacttgatagttcctaagaattanaanaaaaaaaanaanttccacttgatcaanttaattcccttttatttttcctccctcantccccttccttttccaagctgtttgctttgcaatatgt | Mm.337038 | + | | 604224 | | NULL | 8.45 | 169 | 74.310961 | 74.31466 | NULL | NULL | 3 | NULL | NULL | NULL | NULL | NULL | NULL | 1 | 93288 | NULL | XM_129773 | 1 | 74.197594 | 74.197594 | 74.201293 | 4187 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | ++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+ +2 rows in set (0.00 sec) + + + + +** ProbeSetData + +Probedata - main molecular data. Probesets, metabolome, + +Almost all important molecular assay data is in this table including +probe set data, RNA-seq data, proteomic data, and metabolomic +data. 2.5 billion rows March 2016. In comparison, ProbeData contains +data only for Affymetrix probe level data (e.g. Exon array probes and +M430 probes). + +select count(*) from ProbeSetData limit 5; ++---------------+ +| count(*) | ++---------------+ +| 2,510,566,472 | ++---------------+ + + +select * from ProbeSetData limit 5; ++----+----------+-------+ +| Id | StrainId | value | ++----+----------+-------+ +| 1 | 1 | 5.742 | +| 1 | 2 | 5.006 | +| 1 | 3 | 6.079 | +| 1 | 4 | 6.414 | +| 1 | 5 | 4.885 | ++----+----------+-------+ + +show indexes from ProbeSetData; ++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | ++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| ProbeSetData | 0 | DataId | 1 | Id | A | 34868978 | NULL | NULL | | BTREE | | | +| ProbeSetData | 0 | DataId | 2 | StrainId | A | 2510566472 | NULL | NULL | | BTREE | | | ++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ + +select * from Strain limit 5; ++----+----------+----------+-----------+--------+-------+ +| Id | Name | Name2 | SpeciesId | Symbol | Alias | ++----+----------+----------+-----------+--------+-------+ +| 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL | +| 2 | C57BL/6J | C57BL/6J | 1 | B6J | NULL | +| 3 | DBA/2J | DBA/2J | 1 | D2J | NULL | +| 4 | BXD1 | BXD1 | 1 | NULL | NULL | +| 5 | BXD2 | BXD2 | 1 | NULL | NULL | ++----+----------+----------+-----------+--------+-------+ + +show indexes from Strain; ++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | ++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ +| Strain | 0 | PRIMARY | 1 | Id | A | 14368 | NULL | NULL | | BTREE | | | +| Strain | 0 | Name | 1 | Name | A | 14368 | NULL | NULL | YES | BTREE | | | +| Strain | 0 | Name | 2 | SpeciesId | A | 14368 | NULL | NULL | | BTREE | | | +| Strain | 1 | Symbol | 1 | Symbol | A | 14368 | NULL | NULL | YES | BTREE | | | ++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+ + +A typical query may look like + +SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id + FROM (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef) + left join ProbeSetSE on + (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId) + WHERE + ProbeSetFreeze.name = 'B139_K_1206_M' AND + ProbeSetXRef.ProbeSetId = ProbeSet.Id AND + ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND + ProbeSetXRef.DataId = ProbeSetData.Id AND + ProbeSetData.StrainId = Strain.Id + Order BY Strain.Name + ++-------+-------+-------+----------+ +| Name | value | error | Id | ++-------+-------+-------+----------+ +| SM001 | 38.3 | NULL | 25309550 | +| SM001 | 2.7 | NULL | 25309520 | +| SM001 | 20.3 | NULL | 25309507 | +| SM001 | 125.8 | NULL | 25309511 | +| SM001 | 8.2 | NULL | 25309534 | ++-------+-------+-------+----------+ +5 rows in set (22.28 sec) + +select * from ProbeSetFreeze limit 5; ++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+ +| Id | ProbeFreezeId | AvgID | Name | Name2 | FullName | ShortName | CreateTime | OrderList | public | confidentiality | AuthorisedUsers | DataScale | ++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+ +| 1 | 3 | 1 | Br_U_0803_M | BXDMicroArray_ProbeSet_August03 | UTHSC Brain mRNA U74Av2 (Aug03) MAS5 | Brain U74Av2 08/03 MAS5 | 2003-08-01 | NULL | 0 | 0 | NULL | log2 | +| 2 | 10 | 1 | Br_U_0603_M | BXDMicroArray_ProbeSet_June03 | UTHSC Brain mRNA U74Av2 (Jun03) MAS5 | Brain U74Av2 06/03 MAS5 | 2003-06-01 | NULL | 0 | 0 | NULL | log2 | +| 3 | 8 | 1 | Br_U_0303_M | BXDMicroArray_ProbeSet_March03 | UTHSC Brain mRNA U74Av2 (Mar03) MAS5 | Brain U74Av2 03/03 MAS5 | 2003-03-01 | NULL | 0 | 0 | NULL | log2 | +| 4 | 5 | 1 | Br_U_0503_M | BXDMicroArray_ProbeSet_May03 | UTHSC Brain mRNA U74Av2 (May03) MAS5 | Brain U74Av2 05/03 MAS5 | 2003-05-01 | NULL | 0 | 0 | NULL | log2 | +| 5 | 4 | 1 | HC_U_0303_M | GNFMicroArray_ProbeSet_March03 | GNF Hematopoietic Cells U74Av2 (Mar03) MAS5 | GNF U74Av2 03/03 MAS5 | 2003-03-01 | NULL | 0 | 0 | NULL | log2 | ++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+ + + select * from ProbeSetXRef limit 5; ++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+ +| ProbeSetFreezeId | ProbeSetId | DataId | Locus_old | LRS_old | pValue_old | mean | se | Locus | LRS | pValue | additive | h2 | ++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+ +| 1 | 1 | 1 | 10.095.400 | 13.3971627898894 | 0.163 | 5.48794285714286 | 0.08525787814808819 | rs13480619 | 12.590069931048001 | 0.269 | -0.28515625 | NULL | +| 1 | 2 | 2 | D15Mit189 | 10.042057464356201 | 0.431 | 9.90165714285714 | 0.0374686634976217 | CEL-17_50896182 | 10.5970737900941 | 0.304 | -0.11678333333333299 | NULL | +| 1 | 3 | 3 | D5Mit139 | 5.43678531742749 | 0.993 | 7.83948571428571 | 0.0457583416912569 | rs13478499 | 6.0970532702754 | 0.988 | 0.112957489878542 | NULL | +| 1 | 4 | 4 | D1Mit511 | 9.87815279480766 | 0.483 | 8.315628571428569 | 0.0470396593931327 | rs6154379 | 11.774867551173099 | 0.286 | -0.157113725490196 | NULL | +| 1 | 5 | 5 | D16H21S16 | 10.191723834264499 | 0.528 | 9.19345714285714 | 0.0354801718293322 | rs4199265 | 10.923263374016202 | 0.468 | 0.11476470588235299 | NULL | ++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+ + + +Note that the following unlimited search is very slow: + +select max(value) from ProbeSetData; + ++------------+ +| max(value) | ++------------+ +| 26436006 | ++------------+ +1 row in set (2 min 16.31 sec) + +which is in some form is used in the search page, see [[https://github.com/genenetwork/genenetwork2_diet/blob/master/wqflask/wqflask/do_search.py#L811][the search code]]. + + +*** Improvements? + +Suggestions on the schema page: + +"StrainId" should be "CaseId" or "SampleId". + +"ProbeSetData" should probably be "AssayData" or something more neutral. + +*** Comments + +I think the ProbeSetData table should be generalized to a 'phenotypes' +table with an 'sample_id' column and a 'value' column. + +A new table 'samples' will link each sample against an 'experiment', +an 'individual' and which in turn can link to a 'strain'. + +Experiment is here in a wide sense, GTex can be one - I don't want to +use dataset ;) + +This means a (slight) reordering: + +phenotypes: (id), sample_id, value +samples: experiment_id, individual_id +experiments: name, version +individual: strain_id +strains: species_id +species: ... + +ProbeData is also interesting, because it has the same structure as +ProbeSetData, but only contains microarrays. This tables should be one +(when we clear up the cross-referencing) as they both contain +phenotype values. Both are large tables. + +PublishData is another phenotype table with values only which can be +merged into that same table. + +So we have phenotype data in 3 tables with exactly the same +layout. There is also TissueProbeSet*, but we'll ignore those for +now. I think we should merge these into one and have the sample ref +refer to the type of data (probeset, probe, metabolomics, +whatever). These are all phenotype values and by having them split +into different tables they won't play well when looking for +correlations. + +ProbeSet contains the metadata on the probes and should (eventually) +move into NoSQL. There is plenty redundancy in that table now. + +I know it is going to be a pain to reorganize the database, but if we +want to use it in the long run we are going to have to simplify it. + + + +** Publication and publishdata (all pheno) + +Phenotype pubs + +** QuickSearch + +No longer used + +** role + +empty + +** Sample* + +No longer used + +** Species & Strain (should be sample) + +Menu + +** InbredSet + +Menu + +** TableComments + +Metadata on DB + +** Temp* + +User upload data + +** Tissue + +Menu - 3rd level + +** TissueP* + +Correlation tables + +** User collection + +User selection - retained + +** UserPrivilege + +** Vlookup + diff --git a/doc/new_variable_names.txt b/doc/new_variable_names.txt deleted file mode 100644 index c11c160e..00000000 --- a/doc/new_variable_names.txt +++ /dev/null @@ -1,6 +0,0 @@ -RISet/riset -> group -webqtlDataset.py -> data_set.py -webqtlDataset (class object) -> DataSet -database/db -> dataset/data_set -DataEditingPage -> show_trait.py/show_trait.html -webqtlTrait -> GeneralTrait \ No newline at end of file diff --git a/doc/notes_DA.txt b/doc/notes_DA.txt deleted file mode 100644 index 410e0182..00000000 --- a/doc/notes_DA.txt +++ /dev/null @@ -1,10 +0,0 @@ -Danny's notes about the genenetwork source - -Location of static files: - -Location of HTML templates: wqflask/wqflask/templates/ - -Entry point of the wqflask app: wqflask/wqflask/__init__.py - -Application routes: wqflask/wqflask/views.py - diff --git a/doc/gn_installation_notes.txt b/doc/old/gn_installation_notes.txt index 584080f7..efea0309 100644 --- a/doc/gn_installation_notes.txt +++ b/doc/old/gn_installation_notes.txt @@ -96,7 +96,7 @@ Before installing from requirements.txt, install numpy separately: pip install numpy==1.7.0 (or whatever version we're using) Install from requirements.txt (after activating virtualenv): -pip install -r gene/misc/requirements.txt +pip install -r gene/doc/requirements.txt =========================================== @@ -343,4 +343,4 @@ python runserver.py To do full upgrade (as opposed to apt-get upgrade) sudo aptitude full-upgrade -=========================================== \ No newline at end of file +=========================================== diff --git a/doc/notes.txt b/doc/old/notes.txt index f8ce2759..f8ce2759 100644 --- a/doc/notes.txt +++ b/doc/old/notes.txt diff --git a/doc/requirements.txt b/doc/requirements.txt deleted file mode 100644 index 39ee5652..00000000 --- a/doc/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -BeautifulSoup==3.2.1 -Flask==0.9 -Flask-Login==0.1.3 -Flask-Mail==0.7.6 -Flask-Principal==0.3.4 -Flask-SQLAlchemy==0.16 -Flask-Security==1.6.0 -Flask-WTF==0.8.3 -Jinja2==2.6 -MySQL-python==1.2.4 -PyYAML==3.10 -#Reaper==1.0 -Reindent==0.1.1 -SQLAlchemy==0.8.0 -WTForms==1.0.3 -Werkzeug==0.8.3 -apache-libcloud==0.12.3 -argparse==1.2.1 -blinker==1.2 -cairosvg==1.0.15 -itsdangerous==0.17 -logging-tree==1.2 -logilab-astng==0.24.3 -logilab-common==0.59.1 -#numarray==1.5.2 -numpy==1.7.0 -passlib==1.6.1 -pp==1.6.3 -pylint==0.27.0 -redis==2.7.2 -requests==1.1.0 -scipy==0.11.0 -simplejson==3.0.7 -wsgiref==0.1.2 -yolk==0.4.3 -XlsxWriter==0.7.2 diff --git a/doc/todo.txt b/doc/todo.txt deleted file mode 100644 index 1d781b13..00000000 --- a/doc/todo.txt +++ /dev/null @@ -1,2 +0,0 @@ -- Ask Rob about potentially recoding qtlreaper -- Ask Rob about Probe/cellid traits \ No newline at end of file diff --git a/etc/default_settings.py b/etc/default_settings.py new file mode 100644 index 00000000..0cf40265 --- /dev/null +++ b/etc/default_settings.py @@ -0,0 +1,29 @@ +import os +import sys + +LOGFILE = "/tmp/genenetwork2.log" + +# This is needed because Flask turns key errors into a +# 400 bad request response with no exception/log +TRAP_BAD_REQUEST_ERRORS = True + +DB_URI = "mysql://gn2:mysql_password@localhost/db_webqtl_s" +SQLALCHEMY_DATABASE_URI = 'mysql://gn2:mysql_password@localhost/db_webqtl_s' + +# http://pythonhosted.org/Flask-Security/configuration.html +SECURITY_CONFIRMABLE = True +SECURITY_TRACKABLE = True +SECURITY_REGISTERABLE = True +SECURITY_RECOVERABLE = True +SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org" +SECURITY_POST_LOGIN_VIEW = "/thank_you" +SQLALCHEMY_POOL_RECYCLE = 3600 + +SERVER_PORT = 5003 +SECRET_HMAC_CODE = '\x08\xdf\xfa\x93N\x80\xd9\\H@\\\x9f`\x98d^\xb4a;\xc6OM\x946a\xbc\xfc\x80:*\xebc' + +# Path overrides for Genenetwork +GENENETWORK_FILES = os.environ['HOME']+"/gn2_data" +PYLMM_COMMAND = str.strip(os.popen("which pylmm_redis").read()) +PLINK_COMMAND = str.strip(os.popen("which plink2").read()) +GEMMA_COMMAND = str.strip(os.popen("which gemma").read()) diff --git a/etc/nginx-genenetwork.conf b/etc/nginx-genenetwork.conf new file mode 100644 index 00000000..06fd3c57 --- /dev/null +++ b/etc/nginx-genenetwork.conf @@ -0,0 +1,54 @@ +user nobody; +worker_processes 4; +pid /var/run/nginx.pid; + +error_log /var/log/nginx/error.log; + +events { + worker_connections 16; +} + +http { + include mime.types; + # default_type application/octet-stream; + + # sendfile on; + keepalive_timeout 480; + + server { + listen 80; + server_name localhost; + + location / { + root html; + index index.html index.htm; + } + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root html; + } + } + + server { + listen 80; + server_name test-gn2.genenetwork.org; + access_log logs/test-gn2.access.log; + + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + location / { + # proxy_set_header Host $host; + proxy_set_header Host $http_host; + # proxy_redirect off; + proxy_set_header Connection keep-alive; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Host $server_name; + proxy_pass http://127.0.0.1:5003; + } + } +} diff --git a/setup.py b/setup.py index 1b11acc0..a9b71fab 100755 --- a/setup.py +++ b/setup.py @@ -1,8 +1,17 @@ +# Run setup from python - this script is used by the GNU Guix builder. + from setuptools import setup, find_packages setup(name='genenetwork2', version='2.0', + author = "The GeneNetwork Team", + author_email='rwilliams@uthsc.edu', + url = "https://github.com/genenetwork/genenetwork2/blob/master/README.md", + description = 'Website and tools for genetics.', include_package_data=True, - # packages=find_packages() - packages=['wqflask'] - ) + packages=['wqflask','etc'], + scripts=['bin/genenetwork2'], + # package_data = { + # 'etc': ['*.py'] + # } +) diff --git a/test/lib/link_checker.rb b/test/lib/link_checker.rb new file mode 100644 index 00000000..fb201887 --- /dev/null +++ b/test/lib/link_checker.rb @@ -0,0 +1,48 @@ + +class LinkChecker +end + +describe LinkChecker do + before do + @agent = Mechanize.new + @agent.agent.http.ca_file = '/etc/ssl/certs/ca-certificates.crt' + end + + it "Get to front page" do + page = @agent.get($host) + page.links.each do |link| + puts link.href + if link.href !~ /static\/dbdoc\/Hippocampus/ and link.href !~ /glossary.html|sample_r|grits.eecs.utk.edu|correlationAnnotation.html/ + # Fetch link, program will crash with exception if link is broken + linkpage = @agent.get(link.href) + puts "Link to #{link.href} is valid, response code #{linkpage.code}" + end + end + + end + + describe LinkChecker do + it "Get to trait page" do + page = @agent.get($host+'/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P') + p page + + # Get to trait page for 1435395_s_at + # form2 = show_trait_page.forms_with("trait_page")[0] + # form2.fields[30].name.must_equal "variance:C57BL/6J" + # form2.fields[30].value.must_equal "15.287" + + # Test every link on the page to check if it's broken or not + page.links.each do |link| + puts link.href + if link.href !~ /static\/dbdoc\/Hippocampus/ and link.href !~ /glossary.html|sample_r|grits.eecs.utk.edu|correlationAnnotation.html/ + # Fetch link, program will crash with exception if link is broken + linkpage = @agent.get(link.href) + puts "Link to #{link.href} is valid, response code #{linkpage.code}" + end + end + + end + + end + +end diff --git a/test/lib/main_web_functionality.rb b/test/lib/main_web_functionality.rb new file mode 100644 index 00000000..d0a32835 --- /dev/null +++ b/test/lib/main_web_functionality.rb @@ -0,0 +1,49 @@ +# In these tests we navigate from the main page to search + +class MainWebFunctionality +end + +describe MainWebFunctionality do + before do + @agent = Mechanize.new + @agent.agent.http.ca_file = '/etc/ssl/certs/ca-certificates.crt' + end + + describe MainWebFunctionality do + it "Get to trait page" do + page = @agent.get($host) + form = page.forms[1] + form.buttons[0].value.must_equal "Search" + + # http://localhost:5003/search?species=mouse&group=BXD&type=Hippocampus+mRNA&dataset=HC_M2_0606_P&search_terms_or=&search_terms_and=MEAN%3D%2815+16%29+LRS%3D%2823+46%29&FormID=searchResult + form.fields[2].value = "MEAN=(15 16) LRS=(23 46)" + form.fields[3].value = "mouse" + form.fields[4].value = "BXD" + form.fields[5].value = "Hippocampus mRNA" + form.fields[6].value = "HC_M2_0606_P" + search_page = @agent.submit(form, form.buttons.first) + probe_link = search_page.links.find { |l| l.text =~ /1435395_s_at/ } + probe_link.uri.to_s.must_equal "/show_trait?trait_id=1435395_s_at&dataset=HC_M2_0606_P" + show_trait_page = probe_link.click + p show_trait_page + + # Get to trait page for 1435395_s_at + form2 = show_trait_page.forms_with("trait_page")[0] + form2.fields[30].name.must_equal "variance:C57BL/6J" + # form2.fields[30].value.must_equal "15.287" + + # Test every link on the page to check if it's broken or not + show_trait_page.links.each do |link| + puts link.href + if link.href !~ /static\/dbdoc\/Hippocampus/ and link.href !~ /glossary.html|sample_r|grits.eecs.utk.edu|correlationAnnotation.html/ + # Fetch link, program will crash with exception if link is broken + linkpage = @agent.get(link.href) + puts "Link to #{link.href} is valid, response code #{linkpage.code}" + end + end + + end + + end + +end diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py index 379e5906..053b45fc 100644 --- a/wqflask/base/data_set.py +++ b/wqflask/base/data_set.py @@ -44,6 +44,7 @@ from dbFunction import webqtlDatabaseFunction from utility import webqtlUtil from utility.benchmark import Bench from utility import chunks +from utility.tools import locate, locate_ignore_error from maintenance import get_group_samplelists @@ -57,40 +58,26 @@ DS_NAME_MAP = {} def create_dataset(dataset_name, dataset_type = None, get_samplelist = True): if not dataset_type: dataset_type = Dataset_Getter(dataset_name) - #dataset_type = get_dataset_type_from_json(dataset_name) print("dataset_type is:", dataset_type) - #query = """ - # SELECT DBType.Name - # FROM DBList, DBType - # WHERE DBList.Name = '{}' and - # DBType.Id = DBList.DBTypeId - # """.format(escape(dataset_name)) - #dataset_type = g.db.execute(query).fetchone().Name - dataset_ob = DS_NAME_MAP[dataset_type] dataset_class = globals()[dataset_ob] return dataset_class(dataset_name, get_samplelist) - -#def get_dataset_type_from_json(dataset_name): - class Dataset_Types(object): - + def __init__(self): self.datasets = {} file_name = "wqflask/static/new/javascript/dataset_menu_structure.json" with open(file_name, 'r') as fh: data = json.load(fh) - + print("*" * 70) for species in data['datasets']: for group in data['datasets'][species]: for dataset_type in data['datasets'][species][group]: for dataset in data['datasets'][species][group][dataset_type]: - #print("dataset is:", dataset) - short_dataset_name = dataset[1] if dataset_type == "Phenotypes": new_type = "Publish" @@ -99,32 +86,28 @@ class Dataset_Types(object): else: new_type = "ProbeSet" self.datasets[short_dataset_name] = new_type - + def __call__(self, name): return self.datasets[name] - + # Do the intensive work at startup one time only Dataset_Getter = Dataset_Types() -# -#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611")) - - def create_datasets_list(): key = "all_datasets" result = Redis.get(key) - + if result: print("Cache hit!!!") datasets = pickle.loads(result) - + else: datasets = list() with Bench("Creating DataSets object"): type_dict = {'Publish': 'PublishFreeze', 'ProbeSet': 'ProbeSetFreeze', 'Geno': 'GenoFreeze'} - + for dataset_type in type_dict: query = "SELECT Name FROM {}".format(type_dict[dataset_type]) for result in g.db.execute(query).fetchall(): @@ -133,10 +116,10 @@ def create_datasets_list(): #print("type: {}\tname: {}".format(dataset_type, result.Name)) dataset = create_dataset(result.Name, dataset_type) datasets.append(dataset) - + Redis.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL)) Redis.expire(key, 60*60) - + return datasets @@ -157,30 +140,30 @@ def mescape(*items): class Markers(object): """Todo: Build in cacheing so it saves us reading the same file more than once""" def __init__(self, name): - json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json')) + json_data_fh = open(locate(name + '.json','genotype/json')) try: markers = json.load(json_data_fh) except: markers = [] - + for marker in markers: if (marker['chr'] != "X") and (marker['chr'] != "Y"): marker['chr'] = int(marker['chr']) marker['Mb'] = float(marker['Mb']) - + self.markers = markers #print("self.markers:", self.markers) - - + + def add_pvalues(self, p_values): print("length of self.markers:", len(self.markers)) print("length of p_values:", len(p_values)) - + if type(p_values) is list: # THIS IS only needed for the case when we are limiting the number of p-values calculated #if len(self.markers) > len(p_values): # self.markers = self.markers[:len(p_values)] - + for marker, p_value in itertools.izip(self.markers, p_values): if not p_value: continue @@ -213,18 +196,11 @@ class Markers(object): #self.markers.remove(marker) #del self.markers[i] self.markers = filtered_markers - - - #for i, marker in enumerate(self.markers): - # if not 'p_value' in marker: - # #print("self.markers[i]", self.markers[i]) - # del self.markers[i] - # #self.markers.remove(self.markers[i]) class HumanMarkers(Markers): - + def __init__(self, name, specified_markers = []): - marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim')) + marker_data_fh = open(locate('genotype') + '/' + name + '.bim') self.markers = [] for line in marker_data_fh: splat = line.strip().split() @@ -243,39 +219,21 @@ class HumanMarkers(Markers): marker['name'] = splat[1] marker['Mb'] = float(splat[3]) / 1000000 self.markers.append(marker) - + #print("markers is: ", pf(self.markers)) def add_pvalues(self, p_values): - #for marker, p_value in itertools.izip(self.markers, p_values): - # if marker['Mb'] <= 0 and marker['chr'] == 0: - # continue - # marker['p_value'] = p_value - # print("p_value is:", marker['p_value']) - # marker['lod_score'] = -math.log10(marker['p_value']) - # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values - # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61 - - #print("p_values2:", pf(p_values)) super(HumanMarkers, self).add_pvalues(p_values) - - #with Bench("deleting markers"): - # markers = [] - # for marker in self.markers: - # if not marker['Mb'] <= 0 and not marker['chr'] == 0: - # markers.append(marker) - # self.markers = markers - - + class DatasetGroup(object): """ Each group has multiple datasets; each species has multiple groups. - + For example, Mouse has multiple groups (BXD, BXA, etc), and each group has multiple datasets associated with it. - + """ def __init__(self, dataset): """This sets self.group and self.group_id""" @@ -283,14 +241,14 @@ class DatasetGroup(object): self.name, self.id = g.db.execute(dataset.query_for_group).fetchone() if self.name == 'BXD300': self.name = "BXD" - + self.f1list = None self.parlist = None self.get_f1_parent_strains() #print("parents/f1s: {}:{}".format(self.parlist, self.f1list)) - + self.species = webqtlDatabaseFunction.retrieve_species(self.name) - + self.incparentsf1 = False self.allsamples = None self._datasets = None @@ -301,7 +259,7 @@ class DatasetGroup(object): def get_markers(self): #print("self.species is:", self.species) if self.species == "human": - marker_class = HumanMarkers + marker_class = HumanMarkers else: marker_class = Markers @@ -310,12 +268,6 @@ class DatasetGroup(object): def datasets(self): key = "group_dataset_menu:v2:" + self.name print("key is2:", key) - #with Bench("Loading cache"): - # result = Redis.get(key) - #if result: - # self._datasets = pickle.loads(result) - # return self._datasets - dataset_menu = [] print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH) print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH)) @@ -355,7 +307,7 @@ class DatasetGroup(object): dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)])) else: dataset_sub_menu = [item[1:] for item in dataset] - + tissue_already_exists = False tissue_position = None for i, tissue_dict in enumerate(dataset_menu): @@ -383,7 +335,7 @@ class DatasetGroup(object): f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name] except KeyError: f1 = f12 = maternal = paternal = None - + if f1 and f12: self.f1list = [f1, f12] if maternal and paternal: @@ -402,21 +354,17 @@ class DatasetGroup(object): #print(" type: ", type(self.samplelist)) #print(" self.samplelist: ", self.samplelist) else: - #print("Cache not hit") - - from utility.tools import plink_command - PLINK_PATH,PLINK_COMMAND = plink_command() - - geno_file_path = webqtlConfig.GENODIR+self.name+".geno" - plink_file_path = PLINK_PATH+"/"+self.name+".fam" - - if os.path.isfile(plink_file_path): - self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path) - elif os.path.isfile(geno_file_path): - self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path) + print("Cache not hit") + + genotype_fn = locate_ignore_error(self.name+".geno",'genotype') + mapping_fn = locate_ignore_error(self.name+".fam",'mapping') + if mapping_fn: + self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn) + elif genotype_fn: + self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn) else: self.samplelist = None - #print("after get_samplelist") + print("Sample list: ",self.samplelist) Redis.set(key, json.dumps(self.samplelist)) Redis.expire(key, 60*5) @@ -428,30 +376,14 @@ class DatasetGroup(object): def read_genotype_file(self): '''Read genotype from .geno file instead of database''' - #if self.group == 'BXD300': - # self.group = 'BXD' - # - #assert self.group, "self.group needs to be set" - #genotype_1 is Dataset Object without parents and f1 #genotype_2 is Dataset Object with parents and f1 (not for intercross) genotype_1 = reaper.Dataset() # reaper barfs on unicode filenames, so here we ensure it's a string - full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno')) - if os.path.isfile(full_filename): - #print("Reading file: ", full_filename) - genotype_1.read(full_filename) - #print("File read") - else: - try: - full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno')) - #print("Reading file") - genotype_1.read(full_filename) - #print("File read") - except IOError: - print("File doesn't exist!") + full_filename = str(locate(self.name+'.geno','genotype')) + genotype_1.read(full_filename) if genotype_1.type == "group" and self.parlist: genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1) @@ -460,39 +392,15 @@ class DatasetGroup(object): #determine default genotype object if self.incparentsf1 and genotype_1.type != "intercross": - #self.genotype = genotype_2 genotype = genotype_2 else: self.incparentsf1 = 0 - #self.genotype = genotype_1 genotype = genotype_1 - #self.samplelist = list(self.genotype.prgy) self.samplelist = list(genotype.prgy) - - return genotype - -#class DataSets(object): -# """Builds a list of DataSets""" -# -# def __init__(self): -# self.datasets = list() -# - - - #query = """SELECT Name FROM ProbeSetFreeze - # UNION - # SELECT Name From PublishFreeze - # UNION - # SELECT Name From GenoFreeze""" - # - #for result in g.db.execute(query).fetchall(): - # dataset = DataSet(result.Name) - # self.datasets.append(dataset) + return genotype -#ds = DataSets() -#print("[orange] ds:", ds.datasets) class DataSet(object): """ @@ -516,7 +424,7 @@ class DataSet(object): self.check_confidentiality() self.retrieve_other_names() - + self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype if get_samplelist == True: self.group.get_samplelist() @@ -526,32 +434,11 @@ class DataSet(object): def get_desc(self): """Gets overridden later, at least for Temp...used by trait's get_given_name""" return None - - #@staticmethod - #def get_by_trait_id(trait_id): - # """Gets the dataset object given the trait id""" - # - # - # - # name = g.db.execute(""" SELECT - # - # """) - # - # return DataSet(name) # Delete this eventually @property def riset(): Weve_Renamed_This_As_Group - - - #@property - #def group(self): - # if not self._group: - # self.get_group() - # - # return self._group - def retrieve_other_names(self): """ @@ -561,7 +448,7 @@ class DataSet(object): This is not meant to retrieve the data set info if no name at all is passed. """ - + try: if self.type == "ProbeSet": query_args = tuple(escape(x) for x in ( @@ -597,17 +484,17 @@ class DataSet(object): except TypeError: print("Dataset {} is not yet available in GeneNetwork.".format(self.name)) pass - + def get_trait_data(self, sample_list=None): if sample_list: self.samplelist = sample_list else: self.samplelist = self.group.samplelist - + if self.group.parlist != None and self.group.f1list != None: if (self.group.parlist + self.group.f1list) in self.samplelist: self.samplelist += self.group.parlist + self.group.f1list - + query = """ SELECT Strain.Name, Strain.Id FROM Strain, Species WHERE Strain.Name IN {} @@ -624,21 +511,6 @@ class DataSet(object): number_chunks = int(math.ceil(len(sample_ids) / chunk_size)) trait_sample_data = [] for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks): - - #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId - #tempTable = None - #if GeneId and db.type == "ProbeSet": - # if method == "3": - # tempTable = self.getTempLiteratureTable(species=species, - # input_species_geneid=GeneId, - # returnNumber=returnNumber) - # - # if method == "4" or method == "5": - # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol, - # TissueProbeSetFreezeId=tissueProbeSetFreezeId, - # method=method, - # returnNumber=returnNumber) - if self.type == "Publish": dataset_type = "Phenotype" else: @@ -659,7 +531,7 @@ class DataSet(object): left join {}Data as T{} on T{}.Id = {}XRef.DataId and T{}.StrainId={}\n """.format(*mescape(self.type, item, item, self.type, item, item)) - + if self.type == "Publish": query += """ WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId @@ -676,16 +548,16 @@ class DataSet(object): order by {}.Id """.format(*mescape(self.type, self.type, self.type, self.type, self.name, dataset_type, self.type, self.type, dataset_type)) - + #print("trait data query: ", query) - + results = g.db.execute(query).fetchall() #print("query results:", results) trait_sample_data.append(results) trait_count = len(trait_sample_data[0]) self.trait_data = collections.defaultdict(list) - + # put all of the separate data together into a dictionary where the keys are # trait names and values are lists of sample values for trait_counter in range(trait_count): @@ -698,9 +570,9 @@ class PhenotypeDataSet(DataSet): DS_NAME_MAP['Publish'] = 'PhenotypeDataSet' def setup(self): - + #print("IS A PHENOTYPEDATASET") - + # Fields in the database table self.search_fields = ['Phenotype.Post_publication_description', 'Phenotype.Pre_publication_description', @@ -771,26 +643,26 @@ class PhenotypeDataSet(DataSet): def get_trait_info(self, trait_list, species = ''): for this_trait in trait_list: - + if not this_trait.haveinfo: this_trait.retrieve_info(get_qtl_info=True) description = this_trait.post_publication_description - + #If the dataset is confidential and the user has access to confidential #phenotype traits, then display the pre-publication description instead #of the post-publication description if this_trait.confidential: this_trait.description_display = "" continue # for now - + if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait( privilege=self.privilege, userName=self.userName, authorized_users=this_trait.authorized_users): - + description = this_trait.pre_publication_description - + if len(description) > 0: this_trait.description_display = description.strip() else: @@ -835,7 +707,7 @@ class PhenotypeDataSet(DataSet): this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = LRS_score_value = this_trait.lrs this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb)) - + def retrieve_sample_data(self, trait): query = """ SELECT @@ -893,7 +765,7 @@ class GenotypeDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) - + def get_trait_list(self): query = """ select Geno.Name @@ -927,7 +799,7 @@ class GenotypeDataSet(DataSet): this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) ) this_trait.location_value = trait_location_value - + def retrieve_sample_data(self, trait): query = """ SELECT @@ -1019,7 +891,7 @@ class MrnaAssayDataSet(DataSet): def check_confidentiality(self): return geno_mrna_confidentiality(self) - + def get_trait_list_1(self): query = """ select ProbeSet.Name @@ -1028,17 +900,14 @@ class MrnaAssayDataSet(DataSet): and ProbeSetFreezeId = {} """.format(escape(str(self.id))) results = g.db.execute(query).fetchall() - #print("After get_trait_list query") trait_data = {} for trait in results: - #print("Retrieving sample_data for ", trait[0]) trait_data[trait[0]] = self.retrieve_sample_data(trait[0]) - #print("After retrieve_sample_data") return trait_data - + def get_trait_info(self, trait_list=None, species=''): - # Note: setting trait_list to [] is probably not a great idea. + # Note: setting trait_list to [] is probably not a great idea. if not trait_list: trait_list = [] @@ -1101,7 +970,7 @@ class MrnaAssayDataSet(DataSet): #print("query is:", pf(query)) result = g.db.execute(query).fetchone() - + mean = result[0] if result else 0 if mean: @@ -1122,28 +991,15 @@ class MrnaAssayDataSet(DataSet): Geno.SpeciesId = Species.Id """.format(species, this_trait.locus) result = g.db.execute(query).fetchone() - + if result: - #if result[0] and result[1]: - # lrs_chr = result[0] - # lrs_mb = result[1] lrs_chr, lrs_mb = result #XZ: LRS_location_value is used for sorting lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb) - - #try: - # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb) - #except: - # if lrs_chr.upper() == 'X': - # lrs_location_value = 20*1000 + float(lrs_mb) - # else: - # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 + - # float(lrs_mb)) - this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs this_trait.LRS_score_value = this_trait.lrs this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb)) - + def convert_location_to_value(self, chromosome, mb): try: @@ -1154,7 +1010,7 @@ class MrnaAssayDataSet(DataSet): else: location_value = (ord(str(chromosome).upper()[0])*1000 + float(mb)) - + return location_value def get_sequence(self): @@ -1171,7 +1027,7 @@ class MrnaAssayDataSet(DataSet): """ % (escape(self.name), escape(self.dataset.name)) results = g.db.execute(query).fetchone() return results[0] - + def retrieve_sample_data(self, trait): query = """ SELECT @@ -1192,8 +1048,8 @@ class MrnaAssayDataSet(DataSet): results = g.db.execute(query).fetchall() #print("RETRIEVED RESULTS HERE:", results) return results - - + + def retrieve_genes(self, column_name): query = """ select ProbeSet.Name, ProbeSet.%s @@ -1202,37 +1058,8 @@ class MrnaAssayDataSet(DataSet): ProbeSetXRef.ProbeSetId=ProbeSet.Id; """ % (column_name, escape(str(self.id))) results = g.db.execute(query).fetchall() - - return dict(results) - #def retrieve_gene_symbols(self): - # query = """ - # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId - # from ProbeSet,ProbeSetXRef - # where ProbeSetXRef.ProbeSetFreezeId = %s and - # ProbeSetXRef.ProbeSetId=ProbeSet.Id; - # """ % (self.id) - # results = g.db.execute(query).fetchall() - # symbol_dict = {} - # for item in results: - # symbol_dict[item[0]] = item[1] - # return symbol_dict - # - #def retrieve_gene_ids(self): - # query = """ - # select ProbeSet.Name, ProbeSet.GeneId - # from ProbeSet,ProbeSetXRef - # where ProbeSetXRef.ProbeSetFreezeId = %s and - # ProbeSetXRef.ProbeSetId=ProbeSet.Id; - # """ % (self.id) - # return process_and_run_query(query) - # results = g.db.execute(query).fetchall() - # symbol_dict = {} - # for item in results: - # symbol_dict[item[0]] = item[1] - # return symbol_dict - - + return dict(results) class TempDataSet(DataSet): @@ -1254,8 +1081,8 @@ class TempDataSet(DataSet): self.id = 1 self.fullname = 'Temporary Storage' self.shortname = 'Temp' - - + + @staticmethod def handle_pca(desc): if 'PCA' in desc: @@ -1264,13 +1091,13 @@ class TempDataSet(DataSet): else: desc = desc[:desc.index('entered')].strip() return desc - + def get_desc(self): g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name) desc = g.db.fetchone()[0] desc = self.handle_pca(desc) - return desc - + return desc + def get_group(self): self.cursor.execute(""" SELECT @@ -1283,7 +1110,7 @@ class TempDataSet(DataSet): """, self.name) self.group, self.group_id = self.cursor.fetchone() #return self.group - + def retrieve_sample_data(self, trait): query = """ SELECT @@ -1297,7 +1124,7 @@ class TempDataSet(DataSet): Order BY Strain.Name """ % escape(trait.name) - + results = g.db.execute(query).fetchall() diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py new file mode 100755 index 00000000..0358bcbf --- /dev/null +++ b/wqflask/base/webqtlConfig.py @@ -0,0 +1,79 @@ +#########################################' +# Environment Variables - public +# +# Note: much of this needs to handled by the settings/environment +# scripts. But rather than migrating everything in one go, we'll +# take it a step at a time. First the hard coded paths get replaced +# with those in utility/tools.py +# +######################################### + +from utility.tools import mk_dir, assert_dir, flat_files, TEMPDIR + +#Debug Level +#1 for debug, mod python will reload import each time +DEBUG = 1 + +#USER privilege +USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4} + +#minimum number of informative strains +KMININFORMATIVE = 5 + +#maximum number of traits for interval mapping +MULTIPLEMAPPINGLIMIT = 11 + +#maximum number of traits for correlation +MAXCORR = 100 + +#Daily download limit from one IP +DAILYMAXIMUM = 1000 + +#maximum LRS value +MAXLRS = 460.0 + +#temporary data life span +MAXLIFE = 86400 + +#MINIMUM Database public value +PUBLICTHRESH = 0 + +#NBCI address +NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s" +UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s" +GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s" +OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s" +UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s"; +HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s" +PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract" +UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit" +UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' +UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s' +UCSC_GENOME = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=http://web2qtl.utmem.edu:88/snp/chr%s" +ENSEMBLE_BLAT = 'http://www.ensembl.org/Mus_musculus/featureview?type=AffyProbe&id=%s' +DBSNP = 'http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s' +UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s" +GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit" +ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s" + +# The following paths are no longer in use! +# HTMLPATH is replaced by GENODIR +# IMGDIR is replaced by GENERATED_IMAGE_DIR + +# Temporary storage: +TMPDIR = mk_dir(TEMPDIR+'/gn2/') +CACHEDIR = mk_dir(TEMPDIR+'/cache/') +# We can no longer write into the git tree: +GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generated/') +GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generated_text/') + +# Flat file directories +GENODIR = flat_files('genotype')+'/' +JSON_GENODIR = assert_dir(GENODIR+'json/') + +PORTADDR = "http://50.16.251.170" + +INFOPAGEHREF = '/dbdoc/%s.html' +CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR' +SCRIPTFILE = 'main.py' + diff --git a/wqflask/base/webqtlFormData.py b/wqflask/base/webqtlFormData.py index 44fdcc3f..10251756 100755 --- a/wqflask/base/webqtlFormData.py +++ b/wqflask/base/webqtlFormData.py @@ -157,7 +157,7 @@ class webqtlFormData(object): self.genotype_1 = reaper.Dataset() - full_filename = os.path.join(webqtlConfig.GENODIR, self.group + '.geno') + full_filename = locate(self.group + '.geno','genotype') # reaper barfs on unicode filenames, so here we ensure it's a string full_filename = str(full_filename) diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py index 74784853..e748a822 100755 --- a/wqflask/basicStatistics/BasicStatisticsFunctions.py +++ b/wqflask/basicStatistics/BasicStatisticsFunctions.py @@ -118,7 +118,7 @@ def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specia Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel) filename= webqtlUtil.genRandStr("nP_") - c.save(webqtlConfig.IMGDIR+filename, format='gif') + c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') img=HT.Image('/image/'+filename+'.gif',border=0) @@ -145,7 +145,7 @@ def plotBoxPlot(vals): Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait") filename= webqtlUtil.genRandStr("Box_") - canvas.save(webqtlConfig.IMGDIR+filename, format='gif') + canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') img=HT.Image('/image/'+filename+'.gif',border=0) plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13")) @@ -201,7 +201,7 @@ def plotBarGraph(identification='', RISet='', vals=None, type="name"): Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw) filename= webqtlUtil.genRandStr("Bar_") - c.save(webqtlConfig.IMGDIR+filename, format='gif') + c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') img=HT.Image('/image/'+filename+'.gif',border=0) return img diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py index e080050e..489d291f 100755 --- a/wqflask/maintenance/gen_select_dataset.py +++ b/wqflask/maintenance/gen_select_dataset.py @@ -36,8 +36,7 @@ from __future__ import print_function, division #print("cdict is:", cdict) import sys -sys.path.append("/home/zas1024/") -import zach_settings +import zach_settings # no hard code paths! import MySQLdb diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py index b8397b47..a9059fad 100755 --- a/wqflask/maintenance/get_group_samplelists.py +++ b/wqflask/maintenance/get_group_samplelists.py @@ -6,7 +6,6 @@ import gzip from base import webqtlConfig - def process_genofiles(geno_dir=webqtlConfig.GENODIR): print("Yabba") #sys.exit("Dabba") @@ -54,4 +53,4 @@ def get_samplelist_from_plink(genofilename): line = line.split(" ") samplelist.append(line[0]) - return samplelist \ No newline at end of file + return samplelist diff --git a/wqflask/runserver.py b/wqflask/runserver.py index 59ebf0d4..e4392b3f 100755 --- a/wqflask/runserver.py +++ b/wqflask/runserver.py @@ -25,8 +25,8 @@ file_handler = logging.FileHandler(app.config['LOGFILE']) file_handler.setLevel(logging.DEBUG) app.logger.addHandler(file_handler) -import logging_tree -logging_tree.printout() +# import logging_tree +# logging_tree.printout() app.run(host='0.0.0.0', port=app.config['SERVER_PORT'], diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py new file mode 100644 index 00000000..50afea08 --- /dev/null +++ b/wqflask/utility/external.py @@ -0,0 +1,9 @@ +# Call external program + +import os +import sys +import subprocess + +def shell(command): + if subprocess.call(command, shell=True) != 0: + raise Exception("ERROR: failed on "+command) diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py new file mode 100644 index 00000000..67b84dc9 --- /dev/null +++ b/wqflask/utility/genofile_parser.py @@ -0,0 +1,100 @@ +# CTL analysis for GN2 +# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> + +from __future__ import print_function, division, absolute_import +import sys +import os +import glob +import traceback +import gzip + + +import simplejson as json + +from pprint import pformat as pf + +class Marker(object): + def __init__(self): + self.name = None + self.chr = None + self.cM = None + self.Mb = None + self.genotypes = [] + + +class ConvertGenoFile(object): + + def __init__(self, input_file): + self.mb_exists = False + self.cm_exists = False + self.markers = [] + + self.latest_row_pos = None + self.latest_col_pos = None + + self.latest_row_value = None + self.latest_col_value = None + self.input_fh = open(input_file) + print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!") + self.haplotype_notation = { + '@mat': "1", + '@pat': "2", + '@het': "-999", + '@unk': "-999" + } + self.configurations = {} + + def process_rows(self): + for self.latest_row_pos, row in enumerate(self.input_fh): + self.latest_row_value = row + # Take care of headers + if not row.strip(): + continue + if row.startswith('#'): + continue + if row.startswith('Chr'): + if 'Mb' in row.split(): + self.mb_exists = True + if 'cM' in row.split(): + self.cm_exists = True + skip = 2 + self.cm_exists + self.mb_exists + self.individuals = row.split()[skip:] + continue + if row.startswith('@'): + key, _separater, value = row.partition(':') + key = key.strip() + value = value.strip() + if key in self.haplotype_notation: + self.configurations[value] = self.haplotype_notation[key] + continue + if not len(self.configurations): + raise EmptyConfigurations + yield row + + def process_csv(self): + for row_count, row in enumerate(self.process_rows()): + row_items = row.split("\t") + + this_marker = Marker() + this_marker.name = row_items[1] + this_marker.chr = row_items[0] + if self.cm_exists and self.mb_exists: + this_marker.cM = row_items[2] + this_marker.Mb = row_items[3] + genotypes = row_items[4:] + elif self.cm_exists: + this_marker.cM = row_items[2] + genotypes = row_items[3:] + elif self.mb_exists: + this_marker.Mb = row_items[2] + genotypes = row_items[3:] + else: + genotypes = row_items[2:] + for item_count, genotype in enumerate(genotypes): + if genotype.upper().strip() in self.configurations: + this_marker.genotypes.append(self.configurations[genotype.upper().strip()]) + else: + print("WARNING:", genotype.upper()) + this_marker.genotypes.append("NA") + self.markers.append(this_marker.__dict__) + diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py index b8a41f60..dd8c4a1e 100644 --- a/wqflask/utility/tools.py +++ b/wqflask/utility/tools.py @@ -1,84 +1,137 @@ # Tools/paths finder resolves external paths from settings and/or environment # variables -# -# Currently supported: -# -# PYLMM_PATH finds the root of the git repository of the pylmm_gn2 tool import os import sys from wqflask import app -def get_setting(id,default,guess,get_valid_path): - """ - Resolve a setting from the environment or the global settings in app.config +def get_setting(command_id,guess=None): + """Resolve a setting from the environment or the global settings in + app.config, with get_valid_path is a function checking whether the + path points to an expected directory and returns the full path to + the binary command + + guess = os.environ.get('HOME')+'/pylmm' + get_setting('PYLMM_PATH',guess) + + first tries the environment variable in +id+, next gets the Flask + app setting for the same +id+ and finally does an educated + +guess+. + + In all, the environment overrides the others, next is the flask + setting, then the guess. A valid path to the binary command is + returned. If none is resolved an exception is thrown. + + Note that we do not use the system path. This is on purpose + because it will mess up controlled (reproducible) deployment. The + proper way is to either use the GNU Guix defaults as listed in + etc/default_settings.py or override them yourself by creating a + different settings.py file (or setting the environment). + """ + def value(command): + if command: + sys.stderr.write("Found path "+command+"\n") + return command + else: + return None + # ---- Check whether environment exists - path = get_valid_path(os.environ.get(id)) - # ---- Check whether setting exists - setting = app.config.get(id) - if not path: - path = get_valid_path(setting) - # ---- Check whether default exists - if not path: - path = get_valid_path(default) - # ---- Guess directory - if not path: - if not setting: - setting = guess - path = get_valid_path(guess) - if not path: - raise Exception(id+' '+setting+' path unknown or faulty (update settings.py?). '+id+' should point to the root of the git repository') - - return path - -def pylmm_command(default=None): + sys.stderr.write("Looking for "+command_id+"\n") + command = value(os.environ.get(command_id)) + if not command: + # ---- Check whether setting exists in app + command = value(app.config.get(command_id)) + if not command: + command = value(guess) + if not command: + raise Exception(command_id+' path unknown or faulty (update settings.py?). '+command_id+' should point to the path') + return command + +def valid_bin(bin): + if os.path.islink(bin) or valid_file(bin): + return bin + return None + +def valid_file(fn): + if os.path.isfile(fn): + return fn + return None + +def valid_path(dir): + if os.path.isdir(dir): + return dir + return None + +def pylmm_command(guess=None): + return valid_bin(get_setting("PYLMM_COMMAND",guess)) + +def gemma_command(guess=None): + return valid_bin(get_setting("GEMMA_COMMAND",guess)) + +def plink_command(guess=None): + return valid_bin(get_setting("PLINK_COMMAND",guess)) + +def flat_files(subdir=None): + base = get_setting("GENENETWORK_FILES") + if subdir: + return assert_dir(base+"/"+subdir) + return assert_dir(base) + +def assert_dir(dir): + if not valid_path(dir): + raise Exception("ERROR: can not find directory "+dir) + return dir + +def mk_dir(dir): + if not valid_path(dir): + os.makedirs(dir) + return assert_dir(dir) + +def locate(name, subdir=None): """ - Return the path to the repository and the python command to call + Locate a static flat file in the GENENETWORK_FILES environment. + + This function throws an error when the file is not found. """ - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PYLMM_PATH in "+path+"\n") - if path and os.path.isfile(path+'/pylmm_gn2/lmm.py'): - return path + base = get_setting("GENENETWORK_FILES") + if subdir: + base = base+"/"+subdir + if valid_path(base): + lookfor = base + "/" + name + if valid_file(lookfor): + print("Found: file "+lookfor+"\n") + return lookfor else: - None + raise Exception("Can not locate "+lookfor) + if subdir: sys.stderr.write(subdir) + raise Exception("Can not locate "+name+" in "+base) - guess = os.environ.get('HOME')+'/pylmm_gn2' - path = get_setting('PYLMM_PATH',default,guess,get_valid_path) - pylmm_command = 'python '+path+'/pylmm_gn2/lmm.py' - return path,pylmm_command - -def plink_command(default=None): +def locate_ignore_error(name, subdir=None): """ - Return the path to the repository and the python command to call + Locate a static flat file in the GENENETWORK_FILES environment. + + This function does not throw an error when the file is not found + but returns None. """ - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PLINK_PATH in "+path+"\n") - if path and os.path.isfile(path+'/plink'): - return path - else: - None - - guess = os.environ.get('HOME')+'/plink_gemma' - path = get_setting('PLINK_PATH',default,guess,get_valid_path) - plink_command = path+'/plink' - return path,plink_command - -def gemma_command(default=None): - def get_valid_path(path): - """Test for a valid repository""" - if path: - sys.stderr.write("Trying PLINK_PATH in "+path+"\n") - if path and os.path.isfile(path+'/plink'): - return path - else: - None + base = get_setting("GENENETWORK_FILES") + if subdir: + base = base+"/"+subdir + if valid_path(base): + lookfor = base + "/" + name + if valid_file(lookfor): + print("Found: file "+name+"\n") + return lookfor + sys.stderr.write("WARNING: file "+name+" not found\n") + return None + +def tempdir(): + return valid_path(get_setting("TEMPDIR","/tmp")) - guess = os.environ.get('HOME')+'/plink' - path = get_setting('PLINK_PATH',default,guess,get_valid_path) - gemma_command = path+'/gemma' - return path, gemma_command \ No newline at end of file + +# Cached values +PYLMM_COMMAND = pylmm_command() +GEMMA_COMMAND = gemma_command() +PLINK_COMMAND = plink_command() +FLAT_FILES = flat_files() +TEMPDIR = tempdir() diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py index c4cb0145..6d8dd76a 100644 --- a/wqflask/wqflask/correlation/show_corr_results.py +++ b/wqflask/wqflask/correlation/show_corr_results.py @@ -795,13 +795,11 @@ class CorrelationResults(object): for sample in sample_names: if sample not in excluded_samples: - value = start_vars['value:' + sample] - if value.strip().lower() == 'x': - self.sample_data[str(sample)] = None - else: - self.sample_data[str(sample)] = float(value) - - + # print("Looking for",sample,"in",start_vars) + value = start_vars.get('value:' + sample) + if value: + if not value.strip().lower() == 'x': + self.sample_data[str(sample)] = float(value) ##XZ, 12/16/2008: the input geneid is of mouse type #def checkForLitInfo(self,geneId): @@ -1029,7 +1027,7 @@ class CorrelationResults(object): use_tissue_corr = True DatabaseFileName = self.getFileName( target_db_name=self.target_db_name ) - datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r') + datasetFile = open(webqtlConfig.CACHEDIR+DatabaseFileName,'r') #XZ, 01/08/2009: read the first line line = datasetFile.readline() diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py index 6bc0ef77..f74e655d 100755 --- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py +++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py @@ -43,7 +43,6 @@ from pprint import pformat as pf from htmlgen import HTMLgen2 as HT import reaper -from base import webqtlConfig from utility.THCell import THCell from utility.TDCell import TDCell from base.trait import GeneralTrait diff --git a/wqflask/wqflask/ctl/__init__.py b/wqflask/wqflask/ctl/__init__.py new file mode 100755 index 00000000..e69de29b --- /dev/null +++ b/wqflask/wqflask/ctl/__init__.py diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py new file mode 100644 index 00000000..7a42b2f8 --- /dev/null +++ b/wqflask/wqflask/ctl/ctl_analysis.py @@ -0,0 +1,194 @@ +# CTL analysis for GN2 +# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com> +import sys +from numpy import * +import scipy as sp # SciPy +import rpy2.robjects as ro # R Objects +import rpy2.rinterface as ri + +from base.webqtlConfig import GENERATED_IMAGE_DIR +from utility import webqtlUtil # Random number for the image +from utility import genofile_parser # genofile_parser + +import base64 +import array +import csv +import itertools + +from base import data_set +from base import trait as TRAIT + +from utility import helper_functions +from utility.tools import locate + +from rpy2.robjects.packages import importr +utils = importr("utils") + +## Get pointers to some common R functions +r_library = ro.r["library"] # Map the library function +r_options = ro.r["options"] # Map the options function +r_read_csv = ro.r["read.csv"] # Map the read.csv function +r_dim = ro.r["dim"] # Map the dim function +r_c = ro.r["c"] # Map the c function +r_t = ro.r["t"] # Map the t function +r_cat = ro.r["cat"] # Map the cat function +r_paste = ro.r["paste"] # Map the paste function +r_unlist = ro.r["unlist"] # Map the unlist function +r_head = ro.r["head"] # Map the unlist function +r_unique = ro.r["unique"] # Map the unique function +r_length = ro.r["length"] # Map the length function +r_unlist = ro.r["unlist"] # Map the unlist function +r_list = ro.r.list # Map the list function +r_matrix = ro.r.matrix # Map the matrix function +r_seq = ro.r["seq"] # Map the seq function +r_table = ro.r["table"] # Map the table function +r_names = ro.r["names"] # Map the names function +r_sink = ro.r["sink"] # Map the sink function +r_is_NA = ro.r["is.na"] # Map the is.na function +r_file = ro.r["file"] # Map the file function +r_png = ro.r["png"] # Map the png function for plotting +r_dev_off = ro.r["dev.off"] # Map the dev.off function +r_save_image = ro.r["save.image"] # Map the save.image function +r_class = ro.r["class"] # Map the class function +r_save = ro.r["save"] # Map the save function +r_write_table = ro.r["write.table"] # Map the write.table function +r_read_table = ro.r["read.table"] # Map the read.table function +r_as_data_frame = ro.r["as.data.frame"] # Map the write.table function +r_data_frame = ro.r["data.frame"] # Map the write.table function +r_as_numeric = ro.r["as.numeric"] # Map the write.table function + +class CTL(object): + def __init__(self): + print("Initialization of CTL") + #log = r_file("/tmp/genenetwork_ctl.log", open = "wt") + #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file + #r_sink(log, type = "message") + r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive + r_options(stringsAsFactors = False) + print("Initialization of CTL done, package loaded in R session") + self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function + self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function + self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function + self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function + self.r_CTLnetwork = ro.r["CTLnetwork"] # Map the CTLnetwork function + self.r_CTLprofiles = ro.r["CTLprofiles"] # Map the CTLprofiles function + self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function + print("Obtained pointers to CTL functions") + + def run_analysis(self, requestform): + print("Starting CTL analysis on dataset") + self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')] + self.trait_db_list = [x for x in self.trait_db_list if x] + + print("strategy:", requestform.get("strategy")) + strategy = requestform.get("strategy") + + print("nperm:", requestform.get("nperm")) + nperm = int(requestform.get("nperm")) + + print("parametric:", requestform.get("parametric")) + parametric = bool(requestform.get("parametric")) + + print("significance:", requestform.get("significance")) + significance = float(requestform.get("significance")) + + # Get the name of the .geno file belonging to the first phenotype + datasetname = self.trait_db_list[0].split(":")[1] + dataset = data_set.create_dataset(datasetname) + + genofilelocation = locate(dataset.group.name + ".geno", "genotype") + parser = genofile_parser.ConvertGenoFile(genofilelocation) + parser.process_csv() + + # Create a genotype matrix + individuals = parser.individuals + markers = [] + markernames = [] + for marker in parser.markers: + markernames.append(marker["name"]) + markers.append(marker["genotypes"]) + + genotypes = list(itertools.chain(*markers)) + print(len(genotypes) / len(individuals), "==", len(parser.markers)) + + rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True)) + + # Create a phenotype matrix + traits = [] + for trait in self.trait_db_list: + print("retrieving data for", trait) + if trait != "": + ts = trait.split(':') + gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1]) + gt.retrieve_sample_data(individuals) + for ind in individuals: + if ind in gt.data.keys(): + traits.append(gt.data[ind].value) + else: + traits.append("-999") + + rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True)) + + # Use a data frame to store the objects + rPheno = r_data_frame(rPheno) + rGeno = r_data_frame(rGeno) + + # Debug: Print the genotype and phenotype files to disk + #r_write_table(rGeno, "~/outputGN/geno.csv") + #r_write_table(rPheno, "~/outputGN/pheno.csv") + + # Perform the CTL scan + res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6) + + # Get significant interactions + significant = self.r_CTLsignificant(res, significance = significance) + + # Create an image for output + self.results = {} + self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png" + self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1'] + + self.results['ctlresult'] = significant + self.results['requestform'] = requestform # Store the user specified parameters for the output page + + # Create the lineplot + r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png') + self.r_lineplot(res, significance = significance) + r_dev_off() + + n = 2 + for trait in self.trait_db_list: + # Create the QTL like CTL plots + self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png" + self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)] + r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png') + self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait) + r_dev_off() + n = n + 1 + + # Flush any output from R + sys.stdout.flush() + + def loadImage(self, path, name): + print("pre-loading imgage results:", self.results[path]) + imgfile = open(self.results[path], 'rb') + imgdata = imgfile.read() + imgB64 = imgdata.encode("base64") + bytesarray = array.array('B', imgB64) + self.results[name] = bytesarray + + def render_image(self, results): + self.loadImage("imgloc1", "imgdata1") + n = 2 + for trait in self.trait_db_list: + self.loadImage("imgloc" + str(n), "imgdata" + str(n)) + n = n + 1 + + def process_results(self, results): + print("Processing CTL output") + template_vars = {} + template_vars["results"] = self.results + self.render_image(self.results) + sys.stdout.flush() + return(dict(template_vars)) + diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py index 159c5d6c..2f544d44 100755 --- a/wqflask/wqflask/database.py +++ b/wqflask/wqflask/database.py @@ -24,8 +24,9 @@ def init_db(): # you will have to import them first before calling init_db() #import yourapplication.models import wqflask.model - print("Creating all..") + print("database.py: Creating all model metadata..") Base.metadata.create_all(bind=engine) - print("Done creating all...") + print("database.py: Done creating all model metadata...") + print("Point your browser at http://localhost:5003/") init_db() diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py index 40f518f0..2445b37f 100644 --- a/wqflask/wqflask/heatmap/heatmap.py +++ b/wqflask/wqflask/heatmap/heatmap.py @@ -26,13 +26,12 @@ import reaper from base.trait import GeneralTrait from base import data_set from base import species -from base import webqtlConfig -from utility import webqtlUtil # from wqflask.my_pylmm.pyLMM import lmm # from wqflask.my_pylmm.pyLMM import input from utility import helper_functions from utility import Plot, Bunch from utility import temp_data +from utility.tools import PYLMM_COMMAND from MySQLdb import escape_string as escape @@ -214,7 +213,7 @@ class Heatmap(object): #Redis.expire(key, 60*60) #print("before printing command") # - #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + #command = 'python lmm.py --key {} --species {}'.format(key, # "other") #print("command is:", command) #print("after printing command") @@ -273,7 +272,7 @@ class Heatmap(object): Redis.expire(key, 60*60) print("before printing command") - command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key, + command = PYLMM_COMMAND+' --key {} --species {}'.format(key, "other") print("command is:", command) print("after printing command") diff --git a/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py b/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py index ec9aa29c..f45ec0c4 100755 --- a/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py +++ b/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py @@ -45,40 +45,40 @@ class IntervalAnalystPage(templatePage): #A dictionary that lets us map the html form names "txStart_mm6" -> "Mb Start (mm8)" #the first item is the short name (column headers) and the second item is the long name (dropdown list) # [short name, long name, category] - columnNames = {"GeneSymbol" : ["Gene", "Gene Name", 'gene'], + columnNames = {"GeneSymbol" : ["Gene", "Gene Name", 'gene'], "GeneDescription" : ["Description", "Gene Description", 'species'], - 'GeneNeighborsCount' : ["Neighbors", "Gene Neighbors", 'gene'], - 'GeneNeighborsRange' : ["Neighborhood", "Gene Neighborhood (Mb)", 'gene'], - 'GeneNeighborsDensity' : ["Gene Density", "Gene Density (Neighbors/Mb)", 'gene'], + 'GeneNeighborsCount' : ["Neighbors", "Gene Neighbors", 'gene'], + 'GeneNeighborsRange' : ["Neighborhood", "Gene Neighborhood (Mb)", 'gene'], + 'GeneNeighborsDensity' : ["Gene Density", "Gene Density (Neighbors/Mb)", 'gene'], "ProteinID" : ["Prot ID", "Protein ID", 'protein'], - "Chromosome" : ["Chr", "Chromosome", 'species'], - "TxStart" : ["Start", "Mb Start", 'species'], - "TxEnd" : ["End", "Mb End", 'species'], - "GeneLength" : ["Length", "Kb Length", 'species'], - "cdsStart" : ["CDS Start", "Mb CDS Start", 'species'], + "Chromosome" : ["Chr", "Chromosome", 'species'], + "TxStart" : ["Start", "Mb Start", 'species'], + "TxEnd" : ["End", "Mb End", 'species'], + "GeneLength" : ["Length", "Kb Length", 'species'], + "cdsStart" : ["CDS Start", "Mb CDS Start", 'species'], "cdsEnd" : ["CDS End", "Mb CDS End", 'species'], - "exonCount" : ["Num Exons", "Exon Count", 'species'], - "exonStarts" : ["Exon Starts", "Exon Starts", 'species'], - "exonEnds" : ["Exon Ends", "Exon Ends", 'species'], - "Strand" : ["Strand", "Strand", 'species'], + "exonCount" : ["Num Exons", "Exon Count", 'species'], + "exonStarts" : ["Exon Starts", "Exon Starts", 'species'], + "exonEnds" : ["Exon Ends", "Exon Ends", 'species'], + "Strand" : ["Strand", "Strand", 'species'], "GeneID" : ["Gene ID", "Gene ID", 'species'], - "GenBankID" : ["GenBank", "GenBank ID", 'species'], + "GenBankID" : ["GenBank", "GenBank ID", 'species'], "UnigenID" : ["Unigen", "Unigen ID", 'species'], - "NM_ID" : ["NM ID", "NM ID", 'species'], + "NM_ID" : ["NM ID", "NM ID", 'species'], "kgID" : ["kg ID", "kg ID", 'species'], - "snpCount" : ["SNPs", "SNP Count", 'species'], - "snpDensity" : ["SNP Density", "SNP Density", 'species'], - "lrs" : ["LRS", "Likelihood Ratio Statistic", 'misc'], - "lod" : ["LOD", "Likelihood Odds Ratio", 'misc'], - "pearson" : ["Pearson", "Pearson Product Moment", 'misc'], - "literature" : ["Lit Corr", "Literature Correlation", 'misc'], + "snpCount" : ["SNPs", "SNP Count", 'species'], + "snpDensity" : ["SNP Density", "SNP Density", 'species'], + "lrs" : ["LRS", "Likelihood Ratio Statistic", 'misc'], + "lod" : ["LOD", "Likelihood Odds Ratio", 'misc'], + "pearson" : ["Pearson", "Pearson Product Moment", 'misc'], + "literature" : ["Lit Corr", "Literature Correlation", 'misc'], } ###Species Freeze speciesFreeze = {'mouse':'mm9', 'rat':'rn3', 'human':'hg19'} for key in speciesFreeze.keys(): speciesFreeze[speciesFreeze[key]] = key - + def __init__(self, fd): templatePage.__init__(self, fd) @@ -86,7 +86,7 @@ class IntervalAnalystPage(templatePage): fd.formdata['remote_ip'] = fd.remote_ip if not self.openMysql(): return - + self.species = fd.formdata.getvalue("species", "mouse") try: self.startMb = float(fd.formdata.getvalue("startMb")) @@ -96,7 +96,7 @@ class IntervalAnalystPage(templatePage): self.endMb = float(fd.formdata.getvalue("endMb")) except: self.endMb = self.startMb + 10 - + self.Chr = fd.formdata.getvalue("chromosome", "1") self.xls = fd.formdata.getvalue("xls", "1") try: @@ -107,38 +107,38 @@ class IntervalAnalystPage(templatePage): self.diffColDefault = self.diffCol = [] if self.species != 'mouse': self.diffColDefault = [2, 3]#default is B6 and D2 for other species - + controlFrm, dispFields = self.genControlForm(fd) geneTable, filename = self.genGeneTable(fd, dispFields) - + infoTD = HT.TD(width=400, valign= "top") - infoTD.append(HT.Paragraph("Interval Analyst : Chr %s" % self.Chr, Class="title"), - HT.Strong("Species : "), self.species.title(), HT.BR(), - HT.Strong("Database : "), "UCSC %s" % self.speciesFreeze[self.species], HT.BR(), - HT.Strong("Range : "), "%2.6f Mb - %2.6f Mb" % (self.startMb, self.endMb), HT.BR(), + infoTD.append(HT.Paragraph("Interval Analyst : Chr %s" % self.Chr, Class="title"), + HT.Strong("Species : "), self.species.title(), HT.BR(), + HT.Strong("Database : "), "UCSC %s" % self.speciesFreeze[self.species], HT.BR(), + HT.Strong("Range : "), "%2.6f Mb - %2.6f Mb" % (self.startMb, self.endMb), HT.BR(), ) if filename: infoTD.append(HT.BR(), HT.BR(), HT.Href(text="Download", url = "/tmp/" + filename, Class="normalsize") , " output in MS excel format.") - + mainTable = HT.TableLite(HT.TR(infoTD, HT.TD(controlFrm, Class="doubleBorder", width=400), HT.TD(" ", width="")), cellpadding=10) mainTable.append(HT.TR(HT.TD(geneTable, colspan=3))) self.dict['body'] = HT.TD(mainTable) self.dict['title'] = "Interval Analyst" - + def genGeneTable(self, fd, dispFields): filename = "" if self.xls: #import pyXLWriter as xl filename = "IntAn_Chr%s_%2.6f-%2.6f" % (self.Chr, self.startMb, self.endMb) filename += ".xls" - + # Create a new Excel workbook workbook = xl.Writer(os.path.join(webqtlConfig.TMPDIR, filename)) worksheet = workbook.add_worksheet() titleStyle = workbook.add_format(align = 'left', bold = 0, size=18, border = 1, border_color="gray") headingStyle = workbook.add_format(align = 'center', bold = 1, size=13, fg_color = 0x1E, color="white", border = 1, border_color="gray") - + ##Write title Info worksheet.write([0, 0], "GeneNetwork Interval Analyst Table", titleStyle) worksheet.write([1, 0], "%s%s" % (webqtlConfig.PORTADDR, os.path.join(webqtlConfig.CGIDIR, self._scriptfile))) @@ -148,12 +148,12 @@ class IntervalAnalystPage(templatePage): worksheet.write([4, 0], "Search by : %s" % fd.formdata['remote_ip']) worksheet.write([5, 0], "view region : Chr %s %2.6f - %2.6f Mb" % (self.Chr, self.startMb, self.endMb)) nTitleRow = 7 - + geneTable = HT.TableLite(Class="collap", cellpadding=5) headerRow = HT.TR(HT.TD(" ", Class="fs13 fwb ffl b1 cw cbrb", width="1")) if self.xls: worksheet.write([nTitleRow, 0], "Index", headingStyle) - + for ncol, column in enumerate(dispFields): if len(column) == 1: headerRow.append(HT.TD(self.columnNames[column[0]][0], Class="fs13 fwb ffl b1 cw cbrb", NOWRAP=1,align="Center")) @@ -162,24 +162,24 @@ class IntervalAnalystPage(templatePage): worksheet.write([nTitleRow, ncol+1], colTitle, headingStyle) worksheet.set_column([ncol+1, ncol+1], 2*len(colTitle)) else: - headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), " (%s)" % self.speciesFreeze[column[1]], + headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), " (%s)" % self.speciesFreeze[column[1]], Class="fs13 fwb ffl b1 cw cbrb", NOWRAP=1, align="Center")) if self.xls: colTitle = self.columnNames[column[0]][0] + " (%s)" % self.speciesFreeze[column[1]] worksheet.write([nTitleRow, ncol+1], colTitle, headingStyle) worksheet.set_column([ncol+1, ncol+1], 2*len(colTitle)) - #headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), - # "(%s %s)" % (column[1].title(), self.speciesFreeze[column[1]]), + #headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), + # "(%s %s)" % (column[1].title(), self.speciesFreeze[column[1]]), # Class="colorBlue", NOWRAP=1, align="Center")) geneTable.append(headerRow) - + geneCol = GeneUtil.loadGenes(self.cursor, self.Chr, self.diffColDefault, self.startMb, self.endMb, species=self.species) for gIndex, theGO in enumerate(geneCol): geneRow = HT.TR(HT.TD(gIndex+1, Class="fs12 fwn b1", align="right")) if self.xls: nTitleRow += 1 worksheet.write([nTitleRow, 0], gIndex + 1) - + for ncol, column in enumerate(dispFields): if len(column) == 1 or column[1]== self.species: keyValue = "" @@ -196,17 +196,17 @@ class IntervalAnalystPage(templatePage): curGO = theGO[subGO] if theGO[subGO].has_key(fieldName): keyValue = theGO[subGO][fieldName] - + if self.xls: worksheet.write([nTitleRow, ncol+1], keyValue) geneRow.append(self.formatTD(keyValue, fieldName, curSpecies, curGO)) - + geneTable.append(geneRow) - + if self.xls: workbook.close() return geneTable, filename - + def formatTD(self, keyValue, fieldName, Species, theGO): if keyValue is None: keyValue = "" @@ -219,7 +219,7 @@ class IntervalAnalystPage(templatePage): keyValue = "" return HT.TD(keyValue, Class="fs12 fwn b1", width=300) elif fieldName in ("GeneSymbol"): - webqtlLink = HT.Href("./%s?cmd=sch&gene=%s&alias=1&species=%s" % (webqtlConfig.SCRIPTFILE, keyValue, Species), + webqtlLink = HT.Href("./%s?cmd=sch&gene=%s&alias=1&species=%s" % (webqtlConfig.SCRIPTFILE, keyValue, Species), HT.Image("/images/webqtl_search.gif", border=0, valign="top"), target="_blank") if theGO['GeneID']: geneSymbolLink = HT.Href(webqtlConfig.NCBI_LOCUSID % theGO['GeneID'], keyValue, Class="normalsize", target="_blank") @@ -236,8 +236,8 @@ class IntervalAnalystPage(templatePage): return HT.TD(keyValue, Class="fs12 fwn b1",align="right") elif fieldName in ("snpCount"): if keyValue: - snpString = HT.Href(url="%s&chr=%s&start=%s&end=%s&geneName=%s&s1=%d&s2=%d" % (os.path.join(webqtlConfig.CGIDIR, 'main.py?FormID=snpBrowser'), - theGO["Chromosome"], theGO["TxStart"], theGO["TxEnd"], theGO["GeneSymbol"], self.diffColDefault[0], self.diffColDefault[1]), + snpString = HT.Href(url="%s&chr=%s&start=%s&end=%s&geneName=%s&s1=%d&s2=%d" % (os.path.join(webqtlConfig.CGIDIR, 'main.py?FormID=snpBrowser'), + theGO["Chromosome"], theGO["TxStart"], theGO["TxEnd"], theGO["GeneSymbol"], self.diffColDefault[0], self.diffColDefault[1]), text=theGO["snpCount"], target="_blank", Class="normalsize") else: snpString = keyValue @@ -252,13 +252,13 @@ class IntervalAnalystPage(templatePage): return HT.TD(keyValue, Class="fs12 fwn b1",NOWRAP=1) else: return HT.TD(keyValue, Class="fs12 fwn b1",NOWRAP=1,align="right") - + def genControlForm(self, fd): ##desc GeneList self.cursor.execute("Desc GeneList") GeneListFields = self.cursor.fetchall() GeneListFields = map(lambda X: X[0], GeneListFields) - + #group columns by category--used for creating the dropdown list of possible columns categories = {} for item in self.columnNames.keys(): @@ -267,7 +267,7 @@ class IntervalAnalystPage(templatePage): categories[category[-1]] = [item ] else: categories[category[-1]] = categories[category[-1]]+[item] - + ##List All Species in the Gene Table speciesDict = {} self.cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \ @@ -292,34 +292,34 @@ class IntervalAnalystPage(templatePage): pass AppliedField.append(item2) categories[specName] = AppliedField - + categoriesOrder += ['misc'] - + ############################################################ ## Create the list of possible columns for the dropdown list ############################################################ allColumnsList = HT.Select(name="allColumns", Class="snpBrowserDropBox") - + for category in categoriesOrder: allFields = categories[category] if allFields: geneOpt = HT.Optgroup(label=category.title()) for item in allFields: if category in self.speciesFreeze.keys(): - geneOpt.append(("%s (%s %s)" % (self.columnNames[item][1], category.title(), self.speciesFreeze[category]), + geneOpt.append(("%s (%s %s)" % (self.columnNames[item][1], category.title(), self.speciesFreeze[category]), "%s__%s" % (item, self.speciesFreeze[category]))) else: geneOpt.append((self.columnNames[item][1], item)) geneOpt.sort() allColumnsList.append(geneOpt) - + ###################################### ## Create the list of selected columns ###################################### - + #cols contains the value of all the selected columns submitCols = cols = fd.formdata.getvalue("columns", "default") - + if cols == "default": if self.species=="mouse": #these are the same columns that are shown on intervalPage.py cols = ['GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'Strand', 'GeneLength', 'GeneID', 'NM_ID', 'snpCount', 'snpDensity'] @@ -331,12 +331,12 @@ class IntervalAnalystPage(templatePage): else: if type(cols)==type(""): cols = [cols] - + colsLst = [] dispFields = [] for column in cols: if submitCols == "default" and column not in ('GeneSymbol') and (column in GeneListFields or column in speciesField): - colsLst.append(("%s (%s %s)" % (self.columnNames[column][1], self.species.title(), self.speciesFreeze[self.species]), + colsLst.append(("%s (%s %s)" % (self.columnNames[column][1], self.species.title(), self.speciesFreeze[self.species]), "%s__%s" % (column, self.speciesFreeze[self.species]))) dispFields.append([column, self.species]) else: @@ -346,17 +346,17 @@ class IntervalAnalystPage(templatePage): dispFields.append([column]) else: thisSpecies = self.speciesFreeze[column2[1]] - colsLst.append(("%s (%s %s)" % (self.columnNames[column2[0]][1], thisSpecies.title(), column2[1]), + colsLst.append(("%s (%s %s)" % (self.columnNames[column2[0]][1], thisSpecies.title(), column2[1]), column)) dispFields.append((column2[0], thisSpecies)) selectedColumnsList = HT.Select(name="columns", Class="snpBrowserSelectBox", multiple="true", data=colsLst, size=6) - + ########################## ## Create the columns form - ########################## + ########################## columnsForm = HT.Form(name="columnsForm", submit=HT.Input(type='hidden'), cgi=os.path.join(webqtlConfig.CGIDIR, self._scriptfile), enctype="multipart/form-data") columnsForm.append(HT.Input(type="hidden", name="fromdatabase", value= fd.formdata.getvalue("fromdatabase", "unknown"))) - columnsForm.append(HT.Input(type="hidden", name="species", value=self.species)) + columnsForm.append(HT.Input(type="hidden", name="species", value=self.species)) if self.diffCol: columnsForm.append(HT.Input(type="hidden", name="s1", value=self.diffCol[0])) columnsForm.append(HT.Input(type="hidden", name="s2", value=self.diffCol[1])) @@ -366,8 +366,8 @@ class IntervalAnalystPage(templatePage): removeButton = HT.Input(type="button", name="remove", value="Remove", Class="button", onClick="removeFromList(this.form.columns.selectedIndex, this.form.columns)") upButton = HT.Input(type="button", name="up", value="Up", Class="button", onClick="swapOptions(this.form.columns.selectedIndex, this.form.columns.selectedIndex-1, this.form.columns)") downButton = HT.Input(type="button", name="down", value="Down", Class="button", onClick="swapOptions(this.form.columns.selectedIndex, this.form.columns.selectedIndex+1, this.form.columns)") - clearButton = HT.Input(type="button", name="clear", value="Clear", Class="button", onClick="deleteAllElements(this.form.columns)") - submitButton = HT.Input(type="submit", value="Refresh", Class="button", onClick="selectAllElements(this.form.columns)") + clearButton = HT.Input(type="button", name="clear", value="Clear", Class="button", onClick="deleteAllElements(this.form.columns)") + submitButton = HT.Input(type="submit", value="Refresh", Class="button", onClick="selectAllElements(this.form.columns)") selectChrBox = HT.Select(name="chromosome") self.cursor.execute(""" @@ -375,11 +375,11 @@ class IntervalAnalystPage(templatePage): Chr_Length.Name, Length from Chr_Length, Species where Chr_Length.SpeciesId = Species.Id AND - Species.Name = '%s' + Species.Name = '%s' Order by Chr_Length.OrderId """ % self.species) - + results = self.cursor.fetchall() for chrInfo in results: selectChrBox.append((chrInfo[0], chrInfo[0])) @@ -401,5 +401,5 @@ class IntervalAnalystPage(templatePage): #columnsForm.append(HT.Input(type="hidden", name="sort", value=diffCol), # HT.Input(type="hidden", name="identification", value=identification), # HT.Input(type="hidden", name="traitInfo", value=traitInfo)) - + return columnsForm, dispFields diff --git a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py b/wqflask/wqflask/marker_regression/MarkerRegressionPage.py index d02d80b3..4c3391e5 100755 --- a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py +++ b/wqflask/wqflask/marker_regression/MarkerRegressionPage.py @@ -72,7 +72,7 @@ class MarkerRegressionPage(templatePage): #automatically generate pheno txt file for PLINK self.genPhenoTxtFileForPlink(phenoFileName=plinkOutputFileName,RISetName=fd.RISet,probesetName=probesetName, valueDict=allTraitValueDict) # os.system full path is required for input and output files; specify missing value is -9999 - plink_command = '%splink/plink --noweb --ped %splink/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %splink/%s.map --pheno %s/%s.txt --pheno-name %s --missing-phenotype -9999 --out %s%s --assoc ' % (webqtlConfig.HTMLPATH, webqtlConfig.HTMLPATH, fd.RISet, webqtlConfig.HTMLPATH, fd.RISet, webqtlConfig.TMPDIR, plinkOutputFileName, probesetName, webqtlConfig.TMPDIR, plinkOutputFileName) + plink_command = '%splink/plink --noweb --ped %splink/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %splink/%s.map --pheno %s/%s.txt --pheno-name %s --missing-phenotype -9999 --out %s%s --assoc ' % (webqtlConfig.GENODIR, webqtlConfig.GENODIR, fd.RISet, webqtlConfig.GENODIR, fd.RISet, webqtlConfig.TMPDIR, plinkOutputFileName, probesetName, webqtlConfig.TMPDIR, plinkOutputFileName) os.system(plink_command) @@ -140,7 +140,7 @@ class MarkerRegressionPage(templatePage): intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight)) gifmap = self.plotIntMappingForPLINK(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, plinkResultDict=plinkResultDict) - intCanvas.save(os.path.join(webqtlConfig.IMGDIR, filename), format='png') + intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png') intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap') TD_LR = HT.TR(HT.TD(HT.Blockquote(gifmap,intImg, HT.P()), bgColor='#eeeeee', height = 200)) @@ -249,7 +249,7 @@ class MarkerRegressionPage(templatePage): intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight)) gifmap = self.plotIntMapping(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= "") filename= webqtlUtil.genRandStr("Itvl_") - intCanvas.save(os.path.join(webqtlConfig.IMGDIR, filename), format='png') + intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png') intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap') ################################################################ @@ -458,7 +458,7 @@ class MarkerRegressionPage(templatePage): #plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification) Plot.plotBar(myCanvas, LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test') filename= webqtlUtil.genRandStr("Reg_") - myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif') + myCanvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif') img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') if fd.suggestive == None: @@ -1597,7 +1597,7 @@ class MarkerRegressionPage(templatePage): # get strain name from ped file in order def getStrainNameFromPedFile(self, RISetName=''): - pedFileopen= open("%splink/%s.ped"%(webqtlConfig.HTMLPATH, RISetName),"r") + pedFileopen= open("%splink/%s.ped"%(webqtlConfig.GENODIR, RISetName),"r") line =pedFileopen.readline() strainNameList=[] diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py index cfcd4783..caea5802 100644 --- a/wqflask/wqflask/marker_regression/gemma_mapping.py +++ b/wqflask/wqflask/marker_regression/gemma_mapping.py @@ -1,9 +1,7 @@ import os from base import webqtlConfig -from utility.tools import gemma_command - -GEMMA_PATH,GEMMA_COMMAND = gemma_command() +from utility.tools import GEMMA_COMMAND def run_gemma(this_dataset, samples, vals): """Generates p-values for each marker using GEMMA""" @@ -12,8 +10,11 @@ def run_gemma(this_dataset, samples, vals): gen_pheno_txt_file(this_dataset, samples, vals) - os.chdir(GEMMA_PATH) + # Don't do this! + # os.chdir("{}gemma".format(webqtlConfig.GENODIR)) + # use GEMMA_RUN in the next one, create a unique temp file + gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/output/%s.cXX.txt -lmm 1 -o %s_output' % (GEMMA_PATH, this_dataset.group.name, GEMMA_PATH, @@ -46,4 +47,4 @@ def parse_gemma_output(this_dataset): p_values.append(float(line.split("\t")[10])) #print("p_values: ", p_values) - return included_markers, p_values \ No newline at end of file + return included_markers, p_values diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py index c0bfd70b..08f422f0 100644 --- a/wqflask/wqflask/marker_regression/marker_regression.py +++ b/wqflask/wqflask/marker_regression/marker_regression.py @@ -30,21 +30,16 @@ from flask import Flask, g from base.trait import GeneralTrait from base import data_set from base import species -from base import webqtlConfig from utility import webqtlUtil from utility import helper_functions from utility import Plot, Bunch from utility import temp_data from utility.benchmark import Bench -from utility.tools import pylmm_command, plink_command, gemma_command from wqflask.marker_regression import gemma_mapping -#from wqflask.marker_regression import qtl_reaper_mapping -#from wqflask.marker_regression import plink_mapping -#from wqflask.marker_regression import rqtl_mapping -PYLMM_PATH,PYLMM_COMMAND = pylmm_command() -PLINK_PATH,PLINK_COMMAND = plink_command() -GEMMA_PATH,GEMMA_COMMAND = gemma_command() +from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND +from utility.external import shell +from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR class MarkerRegression(object): @@ -304,22 +299,14 @@ class MarkerRegression(object): included_markers, p_values = self.parse_gemma_output() self.dataset.group.get_specified_markers(markers = included_markers) - - #for marker in self.dataset.group.markers.markers: - # if marker['name'] not in included_markers: - # print("marker:", marker) - # self.dataset.group.markers.markers.remove(marker) - # #del self.dataset.group.markers.markers[marker] - self.dataset.group.markers.add_pvalues(p_values) - return self.dataset.group.markers.markers - def parse_gemma_output(self): included_markers = [] p_values = [] - with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file: + # Use a temporary file name here! + with open(webqtlConfig.GENERATED_TEXT_DIR+"/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file: for line in output_file: if line.startswith("chr"): continue @@ -332,37 +319,18 @@ class MarkerRegression(object): def gen_pheno_txt_file(self): """Generates phenotype file for GEMMA""" - - #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile: - # for sample, i in enumerate(self.samples): - # print("sample:" + str(i)) - # print("self.vals[i]:" + str(self.vals[sample])) - # outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n") - - with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile: + with open(webqtlConfig.GENERATED_TEXT_DIR+"{}.fam".format(self.dataset.group.name), "w") as outfile: for i, sample in enumerate(self.samples): outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n") - - #def gen_plink_for_gemma(self, filename): - # - # make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH, - # webqtlConfig.HTMLPATH, - # self.dataset.group.name, - # webqtlConfig.TMPDIR, - # filename, - # webqtlConfig.TMPDIR, - # filename) - # - # def run_rqtl_plink(self): - os.chdir("/home/zas1024/plink") + # os.chdir("") never do this inside a webserver!! output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name)) self.gen_pheno_txt_file_plink(pheno_filename = output_filename) - rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename) + rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) os.system(rqtl_command) @@ -419,10 +387,11 @@ class MarkerRegression(object): calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function read_cross = ro.r["read.cross"] # Map the read.cross function write_cross = ro.r["write.cross"] # Map the write.cross function - GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the GENOtoCSVR function + GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function - genofilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".geno" - crossfilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".cross" + crossname = self.dataset.group.name + genofilelocation = locate(crossname + ".geno", "genotype") + crossfilelocation = TMPDIR + crossname + ".cross" #print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation) @@ -449,7 +418,7 @@ class MarkerRegression(object): #print("Pair scan results:", result_data_frame) self.pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png" - png(file=webqtlConfig.TMPDIR+self.pair_scan_filename) + png(file=TMPDIR+self.pair_scan_filename) plot(result_data_frame) dev_off() @@ -559,8 +528,8 @@ class MarkerRegression(object): self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename) - plink_command = PLINK_COMMAND + ' --noweb --bed %s/%s.bed --bim %s/%s.bim --fam %s/%s.fam --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename) - #print("plink_command:", plink_command) + plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename) + print("plink_command:", plink_command) os.system(plink_command) @@ -581,7 +550,7 @@ class MarkerRegression(object): def gen_pheno_txt_file_plink(self, pheno_filename = ''): ped_sample_list = self.get_samples_from_ped_file() - output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb") + output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") header = 'FID\tIID\t%s\n' % self.this_trait.name output_file.write(header) @@ -616,7 +585,7 @@ class MarkerRegression(object): def gen_pheno_txt_file_rqtl(self, pheno_filename = ''): ped_sample_list = self.get_samples_from_ped_file() - output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb") + output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb") header = 'FID\tIID\t%s\n' % self.this_trait.name output_file.write(header) @@ -762,7 +731,7 @@ class MarkerRegression(object): threshold_p_value = 0.01 - result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb") + result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb") header_line = result_fp.readline()# read header line line = result_fp.readline() @@ -872,9 +841,7 @@ class MarkerRegression(object): Redis.expire(key, 60*60) command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other") - - os.system(command) - + shell(command) json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60) results = json.loads(json_results[1]) @@ -949,12 +916,11 @@ class MarkerRegression(object): Redis.expire(key, 60*60) print("before printing command") - command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, - "other") + command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other") print("command is:", command) print("after printing command") - os.system(command) + shell(command) #t_stats, p_values = lmm.run(key) #lmm.run(key) @@ -991,8 +957,7 @@ class MarkerRegression(object): #def gen_human_results(self, pheno_vector, tempdata): def gen_human_results(self, pheno_vector, key, temp_uuid): - file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name) - print("file_base:", file_base) + file_base = locate(self.dataset.group.name,"mapping") plink_input = input.plink(file_base, type='b') input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz") @@ -1084,7 +1049,11 @@ class MarkerRegression(object): return trimmed_genotype_data def create_snp_iterator_file(group): - plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group) + """ + This function is only called by main below + """ + raise Exception("Paths are undefined here") + plink_file_base = os.path.join(TMPDIR, group) plink_input = input.plink(plink_file_base, type='b') data = dict(plink_input = list(plink_input), @@ -1145,5 +1114,3 @@ def get_markers_from_csv(included_markers, p_values, group_name): if __name__ == '__main__': import cPickle as pickle - import gzip - create_snp_iterator_file("HLC") diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py index e0007455..4460c06d 100644 --- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py +++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py @@ -38,20 +38,11 @@ from htmlgen import HTMLgen2 as HT from base import webqtlConfig from base.GeneralObject import GeneralObject -#from base.webqtlTrait import webqtlTrait -#from base.templatePage import templatePage from utility import webqtlUtil from utility import helper_functions from utility import Plot -#from utility.THCell import THCell -#from utility.TDCell import TDCell from wqflask.interval_analyst import GeneUtil - -#from dbFunction import webqtlDatabaseFunction - -#import logging -#logging.basicConfig(filename="/tmp/gn_leiyan.log", level=logging.INFO) -#_log = logging.getLogger("gn\web\webqtl\intervalMapping\IntervalMappingPage.py") +from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR, GENERATED_IMAGE_DIR ######################################### # Inteval Mapping Plot Page @@ -178,8 +169,8 @@ class MarkerRegression(object): self.species = start_vars['species'] #Needing for form submission when doing single chr mapping or remapping after changing options - self.vals = start_vars['vals'] - self.mapping_method = start_vars['mapping_method'] + self.vals = start_vars['vals'] + self.mapping_method = start_vars['mapping_method'] if self.mapping_method == "rqtl_geno": self.mapmethod_rqtl_geno = start_vars['method'] self.mapmodel_rqtl_geno = start_vars['model'] @@ -232,7 +223,7 @@ class MarkerRegression(object): self.significant = start_vars['significant'] else: self.nperm = 0 - + if 'bootCheck' in start_vars.keys(): self.bootChecked = start_vars['bootCheck'] else: @@ -308,7 +299,7 @@ class MarkerRegression(object): if 'showSNP' in start_vars.keys(): self.SNPChecked = start_vars['showSNP'] else: - self.SNPChecked = False + self.SNPChecked = False if 'showGenes' in start_vars.keys(): self.geneChecked = start_vars['showGenes'] else: @@ -321,7 +312,7 @@ class MarkerRegression(object): self.endMb = float(start_vars['endMb']) except: self.endMb = -1 - try: + try: self.lrsMax = float(start_vars['lrsMax']) except: self.lrsMax = 0 @@ -363,7 +354,7 @@ class MarkerRegression(object): self.ChrList.append((indChr.name, i)) - + self.ChrLengthMbList = g.db.execute(""" Select Length from Chr_Length, InbredSet @@ -517,20 +508,19 @@ class MarkerRegression(object): chrName = self.selectedChr # Draw the genes for this chromosome / region of this chromosome webqtldatabase = self.dataset.name - + if self.dataset.group.species == "mouse": - self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "mouse") + self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "mouse") elif self.dataset.group.species == "rat": self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "rat") - if self.geneCol and self.intervalAnalystChecked: ####################################################################### #Nick use GENEID as RefGene to get Literature Correlation Informations# #For Interval Mapping, Literature Correlation isn't useful, so skip it# #through set GENEID is None # ####################################################################### - + #GENEID = fd.formdata.getvalue('GeneId') or None GENEID = None @@ -538,7 +528,7 @@ class MarkerRegression(object): self.geneTable(self.geneCol, GENEID) #geneTable = self.geneTable(self.geneCol, GENEID) #geneTableContainer.append(geneTable) - + #mainfmName = webqtlUtil.genRandStr("fm_") #tableForm = HT.Form(cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name=mainfmName, submit=HT.Input(type='hidden')) #tableForm.append(HT.Input(name='FormID', value='', type='hidden')) @@ -553,22 +543,22 @@ class MarkerRegression(object): #else: showLocusForm = "" intCanvas = pid.PILCanvas(size=(self.graphWidth, self.graphHeight)) - gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) + gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm) self.gifmap = gifmap.__str__() #print("GIFMAP:", gifmap.__str__()) self.filename= webqtlUtil.genRandStr("Itvl_") - intCanvas.save(os.path.join(webqtlConfig.IMGDIR, self.filename), format='jpeg') + intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename), format='jpeg') intImg=HT.Image('/image/'+self.filename+'.png', border=0, usemap='#WebQTLImageMap') #Scales plot differently for high resolution if self.draw2X: intCanvasX2 = pid.PILCanvas(size=(self.graphWidth*2,self.graphHeight*2)) gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2) - intCanvasX2.save(os.path.join(webqtlConfig.IMGDIR, self.filename+"X2"), format='png') + intCanvasX2.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename+"X2"), format='png') #DLintImgX2=HT.Href(text='Download',url = '/image/'+self.filename+'X2.png', Class='smallsize', target='_blank') - + #textUrl = self.writeQTL2Text(fd, self.filename) ################################################################ @@ -598,7 +588,7 @@ class MarkerRegression(object): showLocusForm.append(intImg) else: showLocusForm = intImg - + if self.permChecked and self.nperm > 0 and not self.multipleInterval and 0 < self.nperm: self.perm_filename = self.drawPermutationHistogram() #perm_text_file = self.permutationTextFile() @@ -668,7 +658,7 @@ class MarkerRegression(object): TD_LR.append(HT.Blockquote(tableForm)) self.body = TD_LR - + #self.dict['body'] = TD_LR #self.dict['title'] = "Mapping" @@ -684,7 +674,7 @@ class MarkerRegression(object): fpText.write("Source: WebQTL, The GeneNetwork (%s)\n" % webqtlConfig.PORTADDR) # - fpText.write("Site: %s\n" % webqtlConfig.SITENAME) + fpText.write("Site: GN\n") fpText.write("Page: Map Viewer\n") fpText.write(time.strftime("Date and Time (US Center): %b %d, %Y at %I.%M %p\n", time.localtime())) fpText.write("Trait ID: %s\n" % self.this_trait.name) @@ -859,7 +849,7 @@ class MarkerRegression(object): BootCoord = [] i = 0 startX = xLeftOffset - + if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes for j, _chr in enumerate(self.genotype): BootCoord.append( []) @@ -882,8 +872,8 @@ class MarkerRegression(object): else: Xc = startX + (_locus.cM-_chr[0].cM)*plotXScale BootCoord[-1].append([Xc, self.bootResult[i]]) - i += 1 - + i += 1 + #reduce bootResult if self.selectedChr > -1: maxBootBar = 80.0 @@ -1412,7 +1402,7 @@ class MarkerRegression(object): if _strains[ii] in self.dataset.group.samplelist: temp = GeneralObject(name=_strains[ii], value=_val) smd.append(temp) - + smd.sort(lambda A, B: cmp(A.value, B.value)) smd.reverse() @@ -1567,14 +1557,14 @@ class MarkerRegression(object): firstGene = 0 else: lastGene = 0 - + for j, _geno in enumerate (self.genotype[0][1].genotype): plotbxd=0 for item in smd: if item.name == samplelist[j]: - plotbxd=1 - + plotbxd=1 + if (plotbxd == 1): ind = 0 @@ -1621,28 +1611,28 @@ class MarkerRegression(object): currentChromosome = self.genotype[0].name i = 0 - + paddingTop = yTopOffset ucscPaddingTop = paddingTop + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING ensemblPaddingTop = ucscPaddingTop + self.UCSC_BAND_HEIGHT + self.BAND_SPACING - + if zoom == 1: for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep): - + calBase = self.kONE_MILLION*(startMb + (endMb-startMb)*(pixel-xLeftOffset-0.0)/plotWidth) - + xBrowse1 = pixel xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1)) - + WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.WEBQTL_BAND_HEIGHT)) bandWidth = xBrowse2 - xBrowse1 WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0) - + WEBQTL_TITLE = "Click to view this section of the genome in WebQTL" gifmap.areas.append(HT.Area(shape='rect',coords=WEBQTL_COORDS,href=WEBQTL_HREF, title=WEBQTL_TITLE)) canvas.drawRect(xBrowse1, paddingTop, xBrowse2, (paddingTop + self.WEBQTL_BAND_HEIGHT), edgeColor=self.CLICKABLE_WEBQTL_REGION_COLOR, fillColor=self.CLICKABLE_WEBQTL_REGION_COLOR) canvas.drawLine(xBrowse1, paddingTop, xBrowse1, (paddingTop + self.WEBQTL_BAND_HEIGHT), color=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR) - + UCSC_COORDS = "%d, %d, %d, %d" %(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.UCSC_BAND_HEIGHT)) if self.dataset.group.species == "mouse": UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr) @@ -1652,7 +1642,7 @@ class MarkerRegression(object): gifmap.areas.append(HT.Area(shape='rect',coords=UCSC_COORDS,href=UCSC_HREF, title=UCSC_TITLE)) canvas.drawRect(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.UCSC_BAND_HEIGHT), edgeColor=self.CLICKABLE_UCSC_REGION_COLOR, fillColor=self.CLICKABLE_UCSC_REGION_COLOR) canvas.drawLine(xBrowse1, ucscPaddingTop, xBrowse1, (ucscPaddingTop+self.UCSC_BAND_HEIGHT), color=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR) - + ENSEMBL_COORDS = "%d, %d, %d, %d" %(xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop+self.ENSEMBL_BAND_HEIGHT)) if self.dataset.group.species == "mouse": ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases) @@ -1767,7 +1757,7 @@ class MarkerRegression(object): ChrAInfo = [] preLpos = -1 distinctCount = 0.0 - + #if len(self.genotype) > 1: if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes for i, _chr in enumerate(self.genotype): @@ -1810,7 +1800,7 @@ class MarkerRegression(object): offsetA = -stepA lineColor = pid.lightblue startPosX = xLeftOffset - + for j, ChrInfo in enumerate(ChrAInfo): preLpos = -1 for i, item in enumerate(ChrInfo): @@ -1878,7 +1868,7 @@ class MarkerRegression(object): # lodm = self.LODFACTOR # else: # lodm = 1.0 - + #ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD if self.lrsMax <= 0: #sliding scale if "lrs_value" in self.qtlresults[0]: @@ -1891,7 +1881,7 @@ class MarkerRegression(object): else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) else: pass else: @@ -1904,10 +1894,10 @@ class MarkerRegression(object): else: if self.permChecked and self.nperm > 0 and not self.multipleInterval: self.significant = min(self.significant, webqtlConfig.MAXLRS) - self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) + self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS) else: pass - + if self.permChecked and self.nperm > 0 and not self.multipleInterval: LRS_LOD_Max = max(self.significant, LRS_LOD_Max) @@ -1924,7 +1914,7 @@ class MarkerRegression(object): LRSScale = 2.5 else: LRSScale = 1.0 - + LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale) #make sure the user's value appears on the y-axis #update by NL 6-21-2011: round the LOD value to 100 when LRS_LOD_Max is equal to 460 @@ -1954,7 +1944,7 @@ class MarkerRegression(object): #"Significant" and "Suggestive" Drawing Routine # ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...) - + #ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist): rightEdge = int(start_pos_x + chr_length_dist*plotXScale - self.SUGGESTIVE_WIDTH/1.5) @@ -1977,13 +1967,13 @@ class MarkerRegression(object): start_pos_x += (chr_length_dist+self.GraphInterval)*plotXScale return start_pos_x - + for i, _chr in enumerate(self.genotype): if self.selectedChr != -1: if _chr.name == self.ChrList[self.selectedChr][0]: startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0]) break - else: + else: continue else: startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i]) @@ -1998,7 +1988,7 @@ class MarkerRegression(object): #else: # dominanceMax = -1 lrsEdgeWidth = 2 - + if zoom == 2: lrsEdgeWidth = 2 * lrsEdgeWidth @@ -2019,7 +2009,7 @@ class MarkerRegression(object): if qtlresult['chr'] != previous_chr and self.selectedChr == -1: if self.manhattan_plot != True: canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) - + if not self.multipleInterval and self.additiveChecked: plusColor = self.ADDITIVE_COLOR_POSITIVE minusColor = self.ADDITIVE_COLOR_NEGATIVE @@ -2049,7 +2039,7 @@ class MarkerRegression(object): canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) else: canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) - + LRSCoordXY = [] AdditiveCoordXY = [] previous_chr = qtlresult['chr'] @@ -2062,7 +2052,7 @@ class MarkerRegression(object): #startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale - #for j, _chr in enumerate(self.genotype): + #for j, _chr in enumerate(self.genotype): #ZS: This is beause the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used if self.plotScale == "physic": this_chr = str(self.ChrList[self.selectedChr][0]) @@ -2126,7 +2116,7 @@ class MarkerRegression(object): if self.manhattan_plot != True: canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) - + if not self.multipleInterval and self.additiveChecked: plusColor = self.ADDITIVE_COLOR_POSITIVE minusColor = self.ADDITIVE_COLOR_NEGATIVE @@ -2156,7 +2146,7 @@ class MarkerRegression(object): canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) else: canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) - + if not self.multipleInterval and INTERCROSS and self.dominanceChecked: plusColor = self.DOMINANCE_COLOR_POSITIVE minusColor = self.DOMINANCE_COLOR_NEGATIVE @@ -2186,7 +2176,7 @@ class MarkerRegression(object): canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) else: canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth)) - + ###draw additive scale if not self.multipleInterval and self.additiveChecked: @@ -2222,7 +2212,7 @@ class MarkerRegression(object): if zoom == 2: fontZoom = 1.5 yTopOffset += 30 - + #calculate plot scale if self.plotScale != 'physic': self.ChrLengthDistList = self.ChrLengthCMList @@ -2589,13 +2579,13 @@ class MarkerRegression(object): perm_output = [value/4.16 for value in self.perm_output] else: perm_output = self.perm_output - + Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test') filename= webqtlUtil.genRandStr("Reg_") - myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif') - + myCanvas.save(GENERATED_IMAGE_DIR+filename, format='gif') + return filename - + # img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test') # self.suggestive = self.perm_output[int(self.nperm*0.37-1)] @@ -2609,9 +2599,9 @@ class MarkerRegression(object): # permutation.append(HT.TR(HT.TD(img)), # HT.TR(HT.TD('')), # HT.TR(HT.TD('Total of %d permutations'%self.nperm))) - + # return permutation - + def permutationTextFile(self): filename= webqtlUtil.genRandStr("Reg_") fpText = open('%s.txt' % (webqtlConfig.TMPDIR+filename), 'wb') @@ -2624,12 +2614,12 @@ class MarkerRegression(object): '    Significant LRS =%3.2f\n'%self.significant, HT.BR(), '    Highly Significant LRS =%3.2f\n' % self.highlysignificant) - + for lrs_value in self.perm_output: fpText.write(str(lrs_value) + "\n") - + textUrl = HT.Href(text = 'Download Permutation Results', url= '/tmp/'+filename+'.txt', target = "_blank", Class='fs12 fwn') - + return textUrl def geneTable(self, geneCol, refGene=None): @@ -3039,4 +3029,4 @@ class MarkerRegression(object): sortby = ("", "") - return sortby \ No newline at end of file + return sortby diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py index a57bfffe..9941a4d3 100755 --- a/wqflask/wqflask/search_results.py +++ b/wqflask/wqflask/search_results.py @@ -24,9 +24,8 @@ from flask import Flask, g from MySQLdb import escape_string as escape # Instead of importing HT we're going to build a class below until we can eliminate it -from htmlgen import HTMLgen2 as HT +# from htmlgen import HTMLgen2 as HT -from base import webqtlConfig from utility.benchmark import Bench from base.data_set import create_dataset from base.trait import GeneralTrait diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py index 156510bb..074c78bf 100755 --- a/wqflask/wqflask/show_trait/show_trait.py +++ b/wqflask/wqflask/show_trait/show_trait.py @@ -16,7 +16,6 @@ from base import webqtlConfig from base import webqtlCaseData from wqflask.show_trait.SampleList import SampleList from utility import webqtlUtil, Plot, Bunch, helper_functions -from utility.tools import pylmm_command, plink_command from base.trait import GeneralTrait from base import data_set from dbFunction import webqtlDatabaseFunction @@ -24,8 +23,8 @@ from basicStatistics import BasicStatisticsFunctions from pprint import pformat as pf -PYLMM_PATH,PYLMM_COMMAND = pylmm_command() -PLINK_PATH,PLINK_COMMAND = plink_command() +from utility.tools import flat_files +MAPPING_PATH = flat_files("mapping") ############################################### # @@ -34,8 +33,6 @@ PLINK_PATH,PLINK_COMMAND = plink_command() # ############################################## - - class ShowTrait(object): def __init__(self, kw): @@ -163,14 +160,14 @@ class ShowTrait(object): def get_mapping_methods(self): '''Only display mapping methods when the dataset group's genotype file exists''' def check_plink_gemma(): - if (os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".bed") and - os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".map")): + if (os.path.isfile(MAPPING_PATH+"/"+self.dataset.group.name+".bed") and + os.path.isfile(MAPPING_PATH+"/"+self.dataset.group.name+".map")): return True else: return False def check_pylmm_rqtl(): - if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno") and (os.path.getsize(webqtlConfig.NEWGENODIR+self.dataset.group.name+".json") > 0): + if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno") and (os.path.getsize(webqtlConfig.JSON_GENODIR+self.dataset.group.name+".json") > 0): return True else: return False diff --git a/wqflask/wqflask/static/css b/wqflask/wqflask/static/css deleted file mode 120000 index 9d8c2f68..00000000 --- a/wqflask/wqflask/static/css +++ /dev/null @@ -1 +0,0 @@ -../../../web/css \ No newline at end of file diff --git a/wqflask/wqflask/static/dbdoc/TODO.md b/wqflask/wqflask/static/dbdoc/TODO.md deleted file mode 100644 index c0a8bab7..00000000 --- a/wqflask/wqflask/static/dbdoc/TODO.md +++ /dev/null @@ -1 +0,0 @@ -TODO: Add all database documentation into this folder diff --git a/wqflask/wqflask/static/images b/wqflask/wqflask/static/images deleted file mode 120000 index 12f0f8b5..00000000 --- a/wqflask/wqflask/static/images +++ /dev/null @@ -1 +0,0 @@ -../../../web/images \ No newline at end of file diff --git a/wqflask/wqflask/static/javascript b/wqflask/wqflask/static/javascript deleted file mode 120000 index 5f58faf4..00000000 --- a/wqflask/wqflask/static/javascript +++ /dev/null @@ -1 +0,0 @@ -../../../web/javascript \ No newline at end of file diff --git a/wqflask/wqflask/static/new/css/corr_matrix.css b/wqflask/wqflask/static/new/css/corr_matrix.css index 495ca28c..cd2b0a80 100755 --- a/wqflask/wqflask/static/new/css/corr_matrix.css +++ b/wqflask/wqflask/static/new/css/corr_matrix.css @@ -3,7 +3,7 @@ -moz-transform: rotate(-90deg); -ms-transform: rotate(-90deg); -o-transform: rotate(-90deg); - trasnform: rotate(-90deg); + transform: rotate(-90deg); color: #000 font-size: 22px; height: 50px; diff --git a/wqflask/wqflask/templates/base.html b/wqflask/wqflask/templates/base.html index f25bf9f7..b4fdbd8e 100755 --- a/wqflask/wqflask/templates/base.html +++ b/wqflask/wqflask/templates/base.html @@ -151,7 +151,7 @@ <!--</p>--> <ul class="footer-links"> - <li><a href="http://atlas.uthsc.edu/mailman/listinfo/genenetwork" target="_blank">Join the mailing list</a></li> + <li><a href="http://listserv.uthsc.edu/mailman/listinfo/genenetwork-dev">Join the mailing list</a></li> <!--<li><a href="#">Friend us on facebook</a></li>--> <!--<li><a href="#">Follow us on twitter</a></li>--> </ul> diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html index f92d9984..c1563b9c 100755 --- a/wqflask/wqflask/templates/collections/view.html +++ b/wqflask/wqflask/templates/collections/view.html @@ -55,6 +55,18 @@ <input type="submit" class="btn btn-primary" value="WGCNA Analysis" /> </div> </form> + <form action="/ctl_setup" method="post"> + {% if uc %} + <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" /> + {% endif %} + <input type="hidden" name="trait_list" id="trait_list" value= " + {% for this_trait in trait_obs %} + {{ this_trait.name }}:{{ this_trait.dataset.name }}, + {% endfor %}" > + <div class="col-xs-2 controls"> + <input type="submit" class="btn btn-primary" value="CTL Analysis" /> + </div> + </form> <form action="/heatmap" method="post"> {% if uc %} <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" /> diff --git a/wqflask/wqflask/templates/ctl_results.html b/wqflask/wqflask/templates/ctl_results.html new file mode 100644 index 00000000..a5cb1c08 --- /dev/null +++ b/wqflask/wqflask/templates/ctl_results.html @@ -0,0 +1,47 @@ +{% extends "base.html" %} +{% block title %}CTL results{% endblock %} + +{% block content %} <!-- Start of body --> +<div class="container"> + <h1>CTL Results</h1> + {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input<br> + <h3>Network Figure</h3> + <a href="/tmp/{{ results['imgurl1'] }}"> + <img alt="Embedded Image" src="data:image/png;base64, + {% for elem in results['imgdata1'] -%} + {% print("%c"|format(elem)) %} + {%- endfor %} + " /></a> + <h3>CTL/QTL Plots for individual traits</h3> + {% for r in range(2, (request.form['trait_list'].split(',')|length +1)) %} + <a href="/tmp/{{ results['imgurl' + r|string] }}"> + <img width=100 height=100 alt="Embedded Image" src="data:image/png;base64, + {% for elem in results['imgdata' + r|string] -%} + {% print("%c"|format(elem)) %} + {%- endfor %} + " /></a> + {% endfor %} + <h3>Tabular results</h3> + <table width="80%"> + <tr><th>trait</th><th>marker</th><th>trait</th><th>LOD</th><th>dCor</th></tr> + significant CTL:<br> + {% for r in range(results['ctlresult'][0]|length) %} + <tr> + {% for c in range(results['ctlresult']|length) %} + <td> + {% if c > 2 %} + {{results['ctlresult'][c][r]|float|round(2)}} + {% else %} + {{results['ctlresult'][c][r]}} + {% endif %} + </td> + {% endfor %} + </tr> + {% endfor %} + </table> + + + + +</div> +{% endblock %} diff --git a/wqflask/wqflask/templates/ctl_setup.html b/wqflask/wqflask/templates/ctl_setup.html new file mode 100644 index 00000000..9c0d7bea --- /dev/null +++ b/wqflask/wqflask/templates/ctl_setup.html @@ -0,0 +1,65 @@ +{% extends "base.html" %} +{% block title %}WCGNA analysis{% endblock %} + +{% block content %} <!-- Start of body --> +<div class="container"> + <h1>CTL analysis parameters</h1> + {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input + +<form action="/ctl_results" method="post" class="form-horizontal"> + <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}"> + + <div class="dropdown"> + <label for="Strategy">Strategy</label> + <div class="col-sm-10"> + <select name="strategy" id="strategy"> + <option value="Exact">Exact</option> + <option value="Full">Full</option> + <option value="Pairwise">Pairwise</option> + </select> + </div> + </div> + + <div class="dropdown"> + <label for="Permutations">Number of permutation (Used when strategy is Full or Pairwise)</label> + <div class="col-sm-10"> + <select name="nperm" id="nperm"> + <option value="100">100</option> + <option value="1000" selected="selected">1000</option> + <option value="10000">10000</option> + </select> + </div> + </div> + + <div class="dropdown"> + <label for="Coefficient">Type of correlation coefficient</label> + <div class="col-sm-10"> + <select name="parametric" id="parametric"> + <option value="False">Spearman</option> + <option value="True">Pearson</option> + </select> + </div> + </div> + + + <div class="dropdown"> + <label for="Significance">Significance level</label> + <div class="col-sm-10"> + <select name="significance" id="significance"> + <option value="0.1">0.1</option> + <option value="0.05" selected="selected">0.05</option> + <option value="0.001">0.001</option> + </select> + </div> + </div> + <br> + <div class="form-group"> + <div class="col-sm-10"> + <input type="submit" class="btn btn-primary" value="Run CTL using these settings" /> + </div> + </div> + +</form> + +</div> +{% endblock %} diff --git a/wqflask/wqflask/templates/index_page.html b/wqflask/wqflask/templates/index_page.html index 5d658bc7..5cc15682 100755 --- a/wqflask/wqflask/templates/index_page.html +++ b/wqflask/wqflask/templates/index_page.html @@ -143,7 +143,7 @@ highly expressed genes (15 to 16 log2 units) AND with peak <a href="http://www.genenetwork.org/glossary.html#L" target="_blank">LRS</a> linkage between 23 and 46.</li> - <li><b>RIF=mitochondrial</b> searches RNA databases for <a href="http://www.ncbi.nlm.nih.gov/projects/GeneRIF/GeneRIFhelp.html" target="_blank"> + <li><b>RIF=mitochondrial</b> searches RNA databases for <a href="https://en.wikipedia.org/wiki/GeneRIF" target="_blank"> GeneRIF</a> links.</li> <li><b>WIKI=nicotine</b> searches <a href="http://www.genenetwork.org/webqtl/main.py?FormID=geneWiki" target="_blank"> diff --git a/wqflask/wqflask/templates/marker_regression_gn1.html b/wqflask/wqflask/templates/marker_regression_gn1.html index d86d981e..978ea55f 100644 --- a/wqflask/wqflask/templates/marker_regression_gn1.html +++ b/wqflask/wqflask/templates/marker_regression_gn1.html @@ -144,14 +144,14 @@ <div class="tab-pane active" id="gn1_map"> <div class="qtlcharts"> {{ gifmap|safe }} - <img src="/static/output/{{ filename }}.jpeg" usemap="#WebQTLImageMap"> + <img src="/generated/{{ filename }}.jpeg" usemap="#WebQTLImageMap"> {% if additiveChecked|upper == "ON" %} <br> <span style="white-space: nowrap;">A positive additive coefficient (green line) indicates that {{ dataset.group.parlist[1] }} alleles increase trait values. In contrast, a negative additive coefficient (orange line) indicates that {{ dataset.group.parlist[0] }} alleles increase trait values.</span> {% endif %} {% if nperm > 0 and permChecked == "ON" %} <br><br> - <img src="/static/output/{{ perm_filename }}.gif"> + <img src="/generated/{{ perm_filename }}.gif"> <br><br> Total of {{ nperm }} permutations  <a href="javascript:export_perm_data();" target="_blank" >Download Permutation Results</a> <br> diff --git a/wqflask/wqflask/templates/wgcna_setup.html b/wqflask/wqflask/templates/wgcna_setup.html index 8ab8c4a0..b4a5730d 100644 --- a/wqflask/wqflask/templates/wgcna_setup.html +++ b/wqflask/wqflask/templates/wgcna_setup.html @@ -17,36 +17,36 @@ {% else %} <form action="/wgcna_results" method="post" class="form-horizontal"> <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}"> - <div class="form-group"> + <div class="form-group"> <label for="SoftThresholds"> Soft threshold: </label> <div class="col-sm-10"> <input type="text" class="form-inline" name="SoftThresholds" id="SoftThresholds" value="1,2,3,4,5,6,7,8,9"> </div> </div> - <div class="form-group"> + <div class="form-group"> <label for="MinModuleSize"> Minimum module size: </label> <div class="col-sm-10"> <input type="text" class="form-inline" name="MinModuleSize" id="MinModuleSize" value="30"> </div> </div> - <div class="form-group"> + <div class="form-group"> <label for="TOMtype"> TOMtype: </label> <div class="col-sm-10"> <input type="text" class="form-inline" name="TOMtype" id="TOMtype" value="unsigned"> </div> - </div> - <div class="form-group"> + </div> + <div class="form-group"> <label for="mergeCutHeight"> mergeCutHeight: </label> <div class="col-sm-10"> <input type="text" class="form-inline" name="mergeCutHeight" id="mergeCutHeight" value="0.25"> </div> - </div> - <div class="form-group"> + </div> + <div class="form-group"> <div class="col-sm-10"> <input type="submit" class="btn btn-primary" value="Run WGCNA using these settings" /> </div> - </div> - </form> + </div> + </form> {% endif %} </div> {% endblock %} diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py index 3cdb9339..bd2fff50 100644 --- a/wqflask/wqflask/views.py +++ b/wqflask/wqflask/views.py @@ -30,7 +30,7 @@ import sqlalchemy from wqflask import app from flask import (render_template, request, make_response, Response, - Flask, g, config, jsonify, redirect, url_for) + Flask, g, config, jsonify, redirect, url_for, send_from_directory) from wqflask import search_results from wqflask import gsearch @@ -48,10 +48,13 @@ from wqflask.correlation_matrix import show_corr_matrix from wqflask.correlation import corr_scatter_plot from wqflask.wgcna import wgcna_analysis +from wqflask.ctl import ctl_analysis from utility import temp_data +from utility.tools import TEMPDIR from base import webqtlFormData +from base.webqtlConfig import GENERATED_IMAGE_DIR from utility.benchmark import Bench from pprint import pformat as pf @@ -96,7 +99,7 @@ def tmp_page(img_path): print("img_path:", img_path) initial_start_vars = request.form print("initial_start_vars:", initial_start_vars) - imgfile = open(webqtlConfig.TMPDIR + img_path, 'rb') + imgfile = open(GENERATED_IMAGE_DIR + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) @@ -172,6 +175,10 @@ def docedit(): doc = docs.Docs(request.args['entry']) return render_template("docedit.html", **doc.__dict__) +@app.route('/generated/<filename>') +def generated_file(filename): + return send_from_directory(GENERATED_IMAGE_DIR,filename) + @app.route("/help") def help(): doc = docs.Docs("help") @@ -190,6 +197,18 @@ def wcgna_results(): result = wgcna.process_results(wgcnaA) # After the analysis is finished store the result return render_template("wgcna_results.html", **result) # Display them using the template +@app.route("/ctl_setup", methods=('POST',)) +def ctl_setup(): + print("In ctl, request.form is:", request.form) # We are going to get additional user input for the analysis + return render_template("ctl_setup.html", **request.form) # Display them using the template + +@app.route("/ctl_results", methods=('POST',)) +def ctl_results(): + print("In ctl, request.form is:", request.form) + ctl = ctl_analysis.CTL() # Start R, load the package and pointers and create the analysis + ctlA = ctl.run_analysis(request.form) # Start the analysis, a ctlA object should be a separate long running thread + result = ctl.process_results(ctlA) # After the analysis is finished store the result + return render_template("ctl_results.html", **result) # Display them using the template @app.route("/news") def news_route(): @@ -446,7 +465,7 @@ def marker_regression_page(): print("img_path:", img_path) initial_start_vars = request.form print("initial_start_vars:", initial_start_vars) - imgfile = open('/home/zas1024/tmp/' + img_path, 'rb') + imgfile = open(TEMPDIR + '/' + img_path, 'rb') imgdata = imgfile.read() imgB64 = imgdata.encode("base64") bytesarray = array.array('B', imgB64) @@ -475,7 +494,7 @@ def export_pdf(): svg_xml = request.form.get("data", "Invalid data") print("svg_xml:", svg_xml) filename = request.form.get("filename", "interval_map_pdf") - filepath = "/home/zas1024/gene/wqflask/output/"+filename + filepath = GENERATED_IMAGE_DIR+filename pdf_file = cairosvg.svg2pdf(bytestring=svg_xml) response = Response(pdf_file, mimetype="application/pdf") response.headers["Content-Disposition"] = "attachment; filename=%s"%filename diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py index 9e9f41bc..880a1cb2 100644 --- a/wqflask/wqflask/wgcna/wgcna_analysis.py +++ b/wqflask/wqflask/wgcna/wgcna_analysis.py @@ -6,7 +6,7 @@ import scipy as sp # SciPy import rpy2.robjects as ro # R Objects import rpy2.rinterface as ri -from base import webqtlConfig # For paths and stuff +from base.webqtlConfig import GENERATED_IMAGE_DIR from utility import webqtlUtil # Random number for the image import base64 @@ -127,8 +127,8 @@ class WGCNA(object): # The iconic WCGNA plot of the modules in the hanging tree self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png" - self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl'] - r_png(self.results['imgloc'], width=1000, height=600) + self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl'] + r_png(self.results['imgloc'], width=1000, height=600, type='cairo-png') mergedColors = self.r_labels2colors(network[1]) self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05) r_dev_off() | 
