aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--INSTALL.md89
-rw-r--r--MANIFEST.in5
-rw-r--r--README-wqflask.txt51
-rw-r--r--README.md35
-rwxr-xr-xbin/genenetwork244
-rw-r--r--doc/GUIX-Reproducible-from-source.org21
-rw-r--r--doc/GUIX-archive.org106
-rw-r--r--doc/README.org168
-rw-r--r--doc/database.org710
-rw-r--r--doc/new_variable_names.txt6
-rw-r--r--doc/notes_DA.txt10
-rw-r--r--doc/old/gn_installation_notes.txt (renamed from doc/gn_installation_notes.txt)4
-rw-r--r--doc/old/notes.txt (renamed from doc/notes.txt)0
-rw-r--r--doc/requirements.txt36
-rw-r--r--doc/todo.txt2
-rw-r--r--etc/default_settings.py29
-rwxr-xr-xsetup.py15
-rwxr-xr-xwqflask/base/data_set.py396
-rwxr-xr-xwqflask/base/webqtlConfig.py84
-rwxr-xr-xwqflask/base/webqtlFormData.py2
-rwxr-xr-xwqflask/basicStatistics/BasicStatisticsFunctions.py6
-rwxr-xr-xwqflask/maintenance/gen_select_dataset.py3
-rwxr-xr-xwqflask/maintenance/get_group_samplelists.py3
-rwxr-xr-xwqflask/runserver.py4
-rw-r--r--wqflask/utility/external.py9
-rw-r--r--wqflask/utility/genofile_parser.py100
-rw-r--r--wqflask/utility/tools.py189
-rwxr-xr-xwqflask/wqflask/correlation/show_corr_results.py14
-rwxr-xr-xwqflask/wqflask/correlation_matrix/show_corr_matrix.py1
-rwxr-xr-xwqflask/wqflask/ctl/__init__.py0
-rw-r--r--wqflask/wqflask/ctl/ctl_analysis.py194
-rwxr-xr-xwqflask/wqflask/database.py5
-rw-r--r--wqflask/wqflask/heatmap/heatmap.py7
-rwxr-xr-xwqflask/wqflask/interval_analyst/IntervalAnalystPage.py138
-rwxr-xr-xwqflask/wqflask/marker_regression/MarkerRegressionPage.py6
-rw-r--r--wqflask/wqflask/marker_regression/gemma_mapping.py11
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression.py87
-rw-r--r--wqflask/wqflask/marker_regression/marker_regression_gn1.py129
-rwxr-xr-xwqflask/wqflask/search_results.py3
-rwxr-xr-xwqflask/wqflask/show_trait/show_trait.py13
l---------wqflask/wqflask/static/css1
-rw-r--r--wqflask/wqflask/static/dbdoc/TODO.md1
l---------wqflask/wqflask/static/images1
l---------wqflask/wqflask/static/javascript1
-rwxr-xr-xwqflask/wqflask/static/new/css/corr_matrix.css2
-rwxr-xr-xwqflask/wqflask/templates/collections/view.html12
-rw-r--r--wqflask/wqflask/templates/ctl_results.html47
-rw-r--r--wqflask/wqflask/templates/ctl_setup.html65
-rw-r--r--wqflask/wqflask/templates/marker_regression_gn1.html2
-rw-r--r--wqflask/wqflask/templates/wgcna_setup.html18
-rw-r--r--wqflask/wqflask/views.py27
-rw-r--r--wqflask/wqflask/wgcna/wgcna_analysis.py4
53 files changed, 2029 insertions, 888 deletions
diff --git a/.gitignore b/.gitignore
index 35be2d7b..4586a167 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,7 +6,6 @@
*~
web/new_genotypes/HSNIH.json
wqflask/secure_server.py
-wqflask/base/webqtlConfig.py
build/
dist/
*.egg-info/
diff --git a/INSTALL.md b/INSTALL.md
deleted file mode 100644
index b8c9c977..00000000
--- a/INSTALL.md
+++ /dev/null
@@ -1,89 +0,0 @@
-# INSTALL Genenetwork2 (GN2)
-
-## Use a Docker image
-
-A Docker image can be generated from
-[here](https://github.com/lomereiter/gn2-docker).
-
-## Fetch GN2 from github
-
-Clone the repository (currently ~800Mb) to local
-
- git clone git@github.com:genenetwork2/genenetwork2.git
-
-## Dependencies
-
-GN2 requires
-
-* python
-* redis-server
-* mysql-server
-
-## Required python modules
-
-Install the following python modules (it is probably wise to use a local
-Python with environment for this)
-
-* Flask
-* pyyaml
-* redis
-* qtlreaper
-* numarray
-* pp
-* Flask-SQLAlchemy
-
-## Set up local file settings.py
-
-```python
-LOGFILE = """/tmp/flask_gn_log"""
-
-#This is needed because Flask turns key errors into a
-#400 bad request response with no exception/log
-TRAP_BAD_REQUEST_ERRORS = True
-
-DB_URI = """mysql://gn2:password@localhost/db_webqtl"""
-SQLALCHEMY_DATABASE_URI = 'mysql://gn2:password@localhost/db_webqtl'
-
-# http://pythonhosted.org/Flask-Security/configuration.html
-SECURITY_CONFIRMABLE = True
-SECURITY_TRACKABLE = True
-SECURITY_REGISTERABLE = True
-SECURITY_RECOVERABLE = True
-
-SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org"
-SECURITY_POST_LOGIN_VIEW = "/thank_you"
-SQLALCHEMY_POOL_RECYCLE = 3600
-
-SERVER_PORT = 5051
-
-SECRET_HMAC_CODE = '*'
-```
-
-```sh
-# Use a working copy of python
-export python=$HOME/ve27/bin/python
-export WQFLASK_SETTINGS=$HOME/settings.py
-source /home/pjotr/ve27/bin/activate
-cd genenetwork2/wqflask
-$python ./runserver.py
-
-or
-
-$python ./secure_server.py
-```
-
-## Running tools
-
-### pylmm
-
-To run pylmm check out the repository at https://github.com/genenetwork/pylmm_gn2.
-
-Next update the setting.py file to point at the tree
-
-GN2 can locate PYLMM through PYLMM_PATH in setting.py (or in ENV)
-
- PYLMM_PATH = '/home/test/opensource/python/pylmm_gn2'
-
-## Other information
-
-Check also the ./misc/ directory for settings
diff --git a/MANIFEST.in b/MANIFEST.in
index f4ea2316..bf23f9aa 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -17,10 +17,6 @@ include wqflask/wqflask/templates/new_security/*
include wqflask/wqflask/correlation/*
include wqflask/wqflask/interval_mapping/*
include wqflask/wqflask/interval_analyst/*
-include wqflask/wqflask/my_pylmm/*
-include wqflask/wqflask/my_pylmm/scripts/*
-include wqflask/wqflask/my_pylmm/pyLMM/*
-include wqflask/wqflask/my_pylmm/data/*
include wqflask/wqflask/static/*
include wqflask/wqflask/static/dbdoc/*
include wqflask/wqflask/static/new/*
@@ -260,3 +256,4 @@ include wqflask/flask_security/templates/security/email/*
include wqflask/dbFunction/*
include wqflask/basicStatistics/*
include wqflask/base/*
+include etc/*.py
diff --git a/README-wqflask.txt b/README-wqflask.txt
deleted file mode 100644
index 03e3a1cc..00000000
--- a/README-wqflask.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-This readme concerns the directory wqflask - an officially sanctioned fork of the main GeneNetwork
-code. It's still very early in the process - but we eventually want to port all of the code
-in GeneNetwork to Flask and Jinja2. For more information about the project in general, see
-the file README.md.
-
-For more information about the port to Flask, please keep reading.
-
-*************************
-
-Requirements:
-
-* Python 2.7
-
-* virtualenv 1.7.1.2 or later
-
-* Other python dependencies are listed in the file wqflask/requirements.txt
-
-**************************
-
-Installation:
-
-We highly recommend you create a virtual enviornment called ve27 in your home directory.
-
-
-Get into your home directory
-> cd ~
-
-Create a virtual environment
-> virtualenv ve27
-
-Activate the environment
-> source ~/ve27/bin/activate
-
-Install dependencies
-> pip install -r ~/gene/wqflask/requirements.txt
-(Or replace gene with the name of the directory holding your repository)
-
-**************************
-
-Running the program:
-
-Assuming your enviornment is activated (source ~/ve27/bin/activate) just run:
-
-> python ~/gene/wqflask/runserver.py
-
-The program as configured runs on port 5000 and does not serve static files.
-
-You'll have to run a webserver to serve pages on port 80 and to server the static files (or
-flask could also be configured to serve the static pages).
-
-A sample configuration file for nginx is in the directory: wqflask/other_config/nginx.conf
diff --git a/README.md b/README.md
index 3d95d05f..4fe739bf 100644
--- a/README.md
+++ b/README.md
@@ -1,28 +1,27 @@
-genenetwork on github (May 7, 2012 by Lei Yan and Rob Williams)
+# GENENETWORK
-www.genenetwork.org
+This repository contains the source code for the GeneNetwork server
+http://www.genenetwork.org/ (version 2 aka GN2).
-Released under Affero General Public License 3 (AGPLv3). See also
-LICENSE.txt
+## Install
-For background see: http://en.wikipedia.org/wiki/Genenetwork
+The recommended installation is with GNU Guix which allows you to
+deploy GN2 and dependencies as a self contained unit on any machine.
+The database can be run separately as well as the source tree (for
+developers). See the [installation docs](doc/README.org).
-WWW service initiated January, 1994 as The Portable Dictionary of the Mouse Genome and
-June 15, 2001 as WebQTL.
+## License
-This code and the main web service is currently operated by Lei Yan, Zachary Sloan,
-Arthur Centeno. Design and code by Xiaodong Zhou, Christian Fernandez, Sam Ockman, Ning Liu, Rudi Alberts,
-Elissa Chesler, Jintao Wang, Kenneth Manly, Robert W. Williams, and colleagues.
+The GeneNetwork2 source code is released under the Affero General
+Public License 3 (AGPLv3). See [LICENSE.txt](LICENSE.txt).
-Code and primary web service managed by Dr. Robert W. Williams and the University of Tennessee Health Science Center,
-Memphis TN, USA.
+## More information
-Email labwilliams@gmail.com or rwilliams@uthsc.edu
+For more information visit http://www.genenetwork.org/
-Older version available on SourceForge http://sourceforge.net/projects/genenetwork/
+## Contact
-Funded by the National Institutes of Health and
-University of Tennessee Center for Integrative and Translational Genomics
+IRC on #genenetwork on irc.freenode.net.
-
-===========
+Code and primary web service managed by Dr. Robert W. Williams and the
+University of Tennessee Health Science Center, Memphis TN, USA.
diff --git a/bin/genenetwork2 b/bin/genenetwork2
new file mode 100755
index 00000000..bbb2a19f
--- /dev/null
+++ b/bin/genenetwork2
@@ -0,0 +1,44 @@
+#! /bin/bash
+#
+# This will run the GN2 server (with default settings if none supplied).
+#
+# Environment settings can be used to preconfigure as well as a
+# settings.py file.
+
+# Absolute path to this script, e.g. /home/user/bin/foo.sh
+SCRIPT=$(readlink -f "$0")
+# Absolute path this script is in, thus /home/user/bin
+GN2_BASE_PATH=$(dirname $(dirname "$SCRIPT"))
+
+GN2_GUIX_PATH=$GN2_BASE_PATH/lib/python2.7/site-packages/genenetwork2-2.0-py2.7.egg
+if [ -d $GN2_GUIX_PATH ]; then
+ GN2_BASE_PATH=$GN2_GUIX_PATH
+fi
+echo $GN2_BASE_PATH
+
+# Handle settings parameter
+settings=$1
+if [ -z $settings ]; then settings=$GN2_BASE_PATH/etc/default_settings.py ; fi
+if [ ! -e $settings ]; then
+ echo "ERROR: can not locate settings file - pass it in the command line"
+ exit 1
+fi
+export WQFLASK_SETTINGS=$settings
+
+# We may change this one:
+export PYTHONPATH=$GN2_BASE_PATH/wqflask:$PYTHONPATH
+
+# TEMPDIR defaults to /tmp if nothing else
+if [ -z $TEMPDIR ]; then
+ TEMPDIR="/tmp"
+fi
+
+# Start the redis server
+echo -n "dir $TEMPDIR
+dbfilename gn2.rdb
+" | redis-server - &
+
+# Start the flask server running GN2
+cd $GN2_BASE_PATH/wqflask
+echo "Starting with $settings"
+/usr/bin/env python runserver.py
diff --git a/doc/GUIX-Reproducible-from-source.org b/doc/GUIX-Reproducible-from-source.org
index b88eb9e8..4399ea26 100644
--- a/doc/GUIX-Reproducible-from-source.org
+++ b/doc/GUIX-Reproducible-from-source.org
@@ -4,6 +4,7 @@
- [[#introduction][Introduction]]
- [[#binary-deployment][Binary deployment]]
- [[#from-source-deployment][From source deployment]]
+ - [[#create-archive][Create archive]]
* Introduction
@@ -31,5 +32,23 @@ Next build guix (and run) following the instructions in [[https://github.com/pjo
Once that is done we can add the guix-bioinformatics path with
-: env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A slurm
+: env GUIX_PACKAGE_PATH=../guix-bioinformatics command
+So
+
+#+begin_src sh :lang bash
+#+begin_src sh :lang bash
+gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A genenetwork
+genenetwork1 1.0-d622c803b out ../guix-bioinformatics/gn/packages/bioinformatics.scm:163:2
+genenetwork2 2.0-9e9475053 out ../guix-bioinformatics/gn/packages/bioinformatics.scm:215:2
+#+end_src sh :lang bash
+
+Install with
+
+#+begin_src sh :lang bash
+gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -i genenetwork2
+#+end_src sh :lang bash
+
+* Create archive
+
+: env GUIX_PACKAGE_PATH=../../genenetwork/guix-bioinformatics/ ./pre-inst-env guix archive --export -r genenetwork2 > guix_gn2-2.0-9e9475053.nar
diff --git a/doc/GUIX-archive.org b/doc/GUIX-archive.org
new file mode 100644
index 00000000..67ab5cd0
--- /dev/null
+++ b/doc/GUIX-archive.org
@@ -0,0 +1,106 @@
+* Binary deployment
+
+Note binary deployment is not working pending a few improvements
+to GNU Guix. See source deployment instead.
+
+** Install Guix using a tar ball
+
+GN can be deployed either as a binary tarball or as a GNU Guix
+package. First install GNU Guix following the instructions of the
+[[https://www.gnu.org/software/guix/manual/html_node/Binary-Installation.html#Binary-Installation][binary installation]] using a tar ball from [[https://www.gnu.org/software/guix/download/][here]].
+
+With guix-daemon running you should be able to install the hello
+package:
+
+: guix package -i hello
+
+** Fix locale
+
+You may want to
+
+#+begin_src sh :lang bash
+export GUIX_LOCPATH=$HOME/.guix-profile/lib/locale
+export LC_ALL=en_US.utf8
+#+end_src sh :lang bash
+
+** Authorize our archives
+
+Next add our archive key to guix (as root):
+
+#+begin_src scheme
+echo "(public-key
+ (ecc
+ (curve Ed25519)
+ (q #E9A95686D8437186302E07C7AB9BF3913F435026C2D389AF27D9C66FD6EBB649#)
+ )
+ )
+"|guix archive --authorize
+#+end_src scheme
+
+if you have trouble finding a suitable guix try
+
+: ls /gnu/store/*guix-*/bin/guix
+
+and you should be able to use this directly, e.g.
+
+: alias guix=/gnu/store/632msbms2yaldfnlrb5lbnlnmn9yjisw-guix-0.9.0/bin/guix
+: guix --version
+
+** Download and install the GN2 archive
+
+Find the archive on
+
+ http://files.genenetwork.org/software/
+
+download and install with
+
+#+begin_src bash
+guix archive --import < genenetwork2-data-hash.nar
+#+end_src bash
+
+and you should see a list of packages installing, e.g.
+
+#+begin_src bash
+importing path `/gnu/store/l1zs2drn3zdzl5ysjcmhibcpa35p9zfc-python2-mysqlclient-1.3.7'
+importing path `/gnu/store/n7kfg4knibvblggy8ci2liscl7vz5wkg-python2-parallel-1.6.4'
+importing path `/gnu/store/qvv16qwlq59gp5d07lwbf5n8ndsi3il3-python2-sqlalchemy-1.0.11'
+importing path `/gnu/store/qw872mbmr9ir0a9drv9xw9pvjk05ywwy-python2-xlsxwriter-0.8.4'
+importing path `/gnu/store/wc112m1xfy3p08v14bdzay2ki2rirdsm-pylmm-gn2-1.0-3c6d1cac8'
+importing path `/gnu/store/zfkcy17c2ks3cd9ks14irdabqvmlfpyn-python2-flask-sqlalchemy-2.1'
+importing path `/gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff4'
+(etc.)
+#+end_src bash
+
+The following packages need to be added and the R path set
+
+: export R_LIBS_SITE="/home/wrk/.guix-profile/site-library/"
+: guix package -i /gnu/store/w0dqg9dshq53j8xhcnqgvnvms2s6y5k5-r-wgcna-1.49-425bc170cc0873ddbd414675ac40f6d4d724c7cb
+: guix package -i /gnu/store/k60bdlm0v7xic88j2z5c1jb1jvc371mn-r-qtl-1.38-4
+
+You can add the last one to your profile
+
+: guix package -i /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff
+: export PATH=~/.guix-profile/bin:$PATH
+: genenetwork2
+
+ or run it directly with
+
+: /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff/bin/genenetwork2
+
+
+
+** Other
+
+Update guix with a 'guix pull' and make guix visible in the path.
+More information exists also in my [[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org][guix-notes]].
+
+With guix running you should be able to install python, for example.
+
+: guix package -i python2
+
+This will make python appear in $HOME/.guix-profile/bin/python. Suggested
+environment settings can be seen with
+
+: guix package --search-paths
+
+
diff --git a/doc/README.org b/doc/README.org
index f6ab6a52..3b96717f 100644
--- a/doc/README.org
+++ b/doc/README.org
@@ -1,28 +1,166 @@
-#+TITLE: Installing GeneNetwork services with GNU Guix
+#+TITLE: Installing GeneNetwork services
* Table of Contents :TOC:
- [[#introduction][Introduction]]
- - [[#binary-deployment][Binary deployment]]
- - [[#from-source-deployment][From source deployment]]
+ - [[#source-deployment][Source deployment]]
+ - [[#install-guix][Install guix]]
+ - [[#checkout-the-git-repositories][Checkout the git repositories]]
+ - [[#update-guix][Update guix]]
+ - [[#install-gn2][Install GN2]]
+ - [[#run-gn2][Run GN2]]
+ - [[#run-mysql-server][Run MySQL server]]
+ - [[#run-your-own-copy-of-gn2][Run your own copy of GN2]]
+ - [[#source-deployment-and-other-information-on-reproducibility][Source deployment and other information on reproducibility]]
* Introduction
-Large system deployments tend to get very complex. In this document we
-explain the GeneNetwork deployment system which is based on GNU Guix
-(see Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]).
+Large system deployments can get very complex. In this document we
+explain the GeneNetwork version 2 (GN2) reproducible deployment system
+which is based on GNU Guix (see also Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]). The Guix
+system can be used to install GN with all its files and dependencies.
-* Binary deployment
+The official installation path is from a checked out version of the
+main Guix package tree and that of the Genenetwork package
+tree. Current supported versions can be found as the SHA values of
+'gn-latest' branches of [[https://github.com/genenetwork/guix-bioinformatics/tree/gn-latest][Guix bioinformatics]] and [[https://github.com/genenetwork/guix/tree/gn-latest][GNU Guix main]].
-NYA
+* Source deployment
+** Install guix
-* From source deployment
+Deploying from source is also straightforward. Install GNU Guix using
+a binary tar ball as described [[https://github.com/pjotrp/guix-notes][here]].
-GNU Guix allows for [[https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org][reproducible deployment]] based on a checked out
-Guix repository - use gn-stable for that:
+If it works you should be able to install a package with
-#+begin_src sh :lang bash
+: guix package -i hello
+
+** Checkout the git repositories
+
+Check out the two relevant guix and guix-bioinformatics git
+repositories:
+
+#+begin_src bash
+cd ~
mkdir genenetwork
cd genenetwork
-git checkout https://github.com/genenetwork/guix.git gn-stable-guix
-git checkout https://github.com/genenetwork/guix-bioinformatics.git
-#+end_src
+git clone --branch gn-latest https://github.com/genenetwork/guix-bioinformatics
+git clone --branch gn-latest --recursive https://github.com/genenetwork/guix guix-gn-latest
+cd guix-gn-latest
+#+end_src bash
+
+** Update guix
+
+At some point you may decide to create, install and run a recent
+version of the guix-daemon by compiling the guix repository. Follow
+[[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org#building-gnu-guix-from-source-using-guix][these]] steps carefully.
+
+** Install GN2
+
+#+begin_src bash
+env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix package -i genenetwork2 --fallback
+#+end_src bash
+
+Note that you can use the genenetwork.org guix substitute caching
+server at http://guix.genenetwork.org (which speeds up installs
+significantly because all packages are pre-built).
+
+Make a note of the paths with
+
+#+begin_src bash
+./pre-inst-env guix package --search-paths
+#+end_src bash
+
+** Run GN2
+
+After setting the paths for the server
+
+#+begin_src bash
+export PATH=~/.guix-profile/bin:$PATH
+export PYTHONPATH="$HOME/.guix-profile/lib/python2.7/site-packages"
+export R_LIBS_SITE="$HOME/.guix-profile/site-library/"
+export GUIX_GTK3_PATH="$HOME/.guix-profile/lib/gtk-3.0"
+export GI_TYPELIB_PATH="$HOME/.guix-profile/lib/girepository-1.0"
+export XDG_DATA_DIRS="$HOME/.guix-profile/share"
+export GIO_EXTRA_MODULES="$HOME/.guix-profile/lib/gio/modules"
+#+end_src bash
+
+run the main script (in ~/.guix-profile/bin)
+
+#+begin_src bash
+genenetwork2
+#+end_src bash
+
+will start the default server which listens on port 5003, i.e.,
+http://localhost:5003/.
+
+** Run MySQL server
+
+At this point we require the underlying distribution to install
+and run mysqld.
+
+Download one of
+
+http://files.genenetwork.org/raw_database/
+https://s3.amazonaws.com/genenetwork2/db_webqtl_s.zip
+
+Check the md5sum.
+
+After installation inflate the database binary in the MySQL directory
+(this is subject to change soon)
+
+: chown -R mysql:mysql db_webqtl_s/
+: chmod 700 db_webqtl_s/
+: chmod 660 db_webqtl_s/*
+
+restart MySQL service (mysqld). Login as root and
+
+: mysql> show databases;
+: +--------------------+
+: | Database |
+: +--------------------+
+: | information_schema |
+: | db_webqtl_s |
+: | mysql |
+: | performance_schema |
+: +--------------------+
+
+Set permissions and match password in your settings file below:
+
+: mysql> grant all privileges on db_webqtl_s.* to gn2@"localhost" identified by 'mysql_password';
+
+Note that if the mysql connection is not working, try connecting to
+the IP address and check server firewall, hosts.allow and mysql IP
+configuration.
+
+** Run your own copy of GN2
+
+At some point you may want to fix the source code. Assuming you have
+Guix and Genenetwork2 installed (as described above) clone the GN2
+repository from https://github.com/genenetwork/genenetwork2_diet
+
+Copy the paths into your terminal from (mainly so PYTHON_PATH and
+R_LIBS_SITE are set)
+
+: guix package --search-paths
+
+Inside the repository:
+
+: cd genenetwork2_diet
+: ./bin/genenetwork2
+
+Will fire up your local repo http://localhost:5003/ using the
+settings in ./etc/default_settings.py
+
+To override settings create your own from a copy of
+default_settings.py and pass it into GN2 with
+
+: ./bin/genenetwork2 $HOME/my_settings.py
+
+and everything *should* work (note the full path to the settings
+file). This way we develop against the exact same dependency graph of
+software.
+
+* Source deployment and other information on reproducibility
+
+See the document [[GUIX-Reproducible-from-source.org]].
+
diff --git a/doc/database.org b/doc/database.org
new file mode 100644
index 00000000..e06ac1ff
--- /dev/null
+++ b/doc/database.org
@@ -0,0 +1,710 @@
+- github Document reduction issue
+
+
+* GeneNetwork Database
+
+** Estimated table sizes
+
+
+select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length;
+
++-------------------------+------------+
+| table_name | Size in MB |
++-------------------------+------------+
+| ProbeSetData | 59358.80 |
+| SnpAll | 15484.67 |
+| ProbeData | 22405.44 |
+| SnpPattern | 9177.05 |
+| ProbeSetSE | 14551.02 |
+| QuickSearch | 5972.86 |
+| ProbeSetXRef | 4532.89 |
+| LCorrRamin3 | 18506.53 |
+| ProbeSE | 6263.83 |
+| ProbeSet | 2880.21 |
+| Probe | 2150.30 |
+| GenoData | 3291.91 |
+| CeleraINFO_mm6 | 989.80 |
+| pubmedsearch | 1032.50 |
+| ProbeXRef | 743.38 |
+| GeneRIF_BASIC | 448.54 |
+| BXDSnpPosition | 224.44 |
+| EnsemblProbe | 133.66 |
+| EnsemblProbeLocation | 105.49 |
+| Genbank | 37.71 |
+| TissueProbeSetData | 74.42 |
+| AccessLog | 42.38 |
+| GeneList | 34.11 |
+| Geno | 33.90 |
+| MachineAccessLog | 28.34 |
+| IndelAll | 22.42 |
+| PublishData | 22.54 |
+| TissueProbeSetXRef | 14.73 |
+| ProbeH2 | 13.26 |
+| GenoXRef | 22.83 |
+| TempData | 8.35 |
+| GeneList_rn3 | 5.54 |
+| GORef | 4.97 |
+| Phenotype | 6.50 |
+| temporary | 3.59 |
+| InfoFiles | 3.32 |
+| Publication | 3.42 |
+| Homologene | 5.69 |
+| Datasets | 2.31 |
+| GeneList_rn33 | 2.61 |
+| PublishSE | 4.71 |
+| GeneRIF | 2.18 |
+| Vlookup | 1.87 |
+| H2 | 2.18 |
+| PublishXRef | 2.18 |
+| NStrain | 4.80 |
+| IndelXRef | 2.91 |
+| Strain | 1.07 |
+| GeneMap_cuiyan | 0.51 |
+| user_collection | 0.30 |
+| CaseAttributeXRef | 0.44 |
+| StrainXRef | 0.56 |
+| GeneIDXRef | 0.77 |
+| Docs | 0.17 |
+| News | 0.17 |
+| ProbeSetFreeze | 0.22 |
+| GeneRIFXRef | 0.24 |
+| Sample | 0.06 |
+| login | 0.06 |
+| user | 0.04 |
+| TableFieldAnnotation | 0.05 |
+| DatasetMapInvestigator | 0.05 |
+| User | 0.04 |
+| ProbeFreeze | 0.06 |
+| TableComments | 0.02 |
+| Investigators | 0.02 |
+| DBList | 0.03 |
+| Tissue | 0.02 |
+| GeneChip | 0.01 |
+| GeneCategory | 0.01 |
+| SampleXRef | 0.01 |
+| InbredSet | 0.01 |
+| SnpAllele_to_be_deleted | 0.00 |
+| Organizations | 0.01 |
+| PublishFreeze | 0.00 |
+| GenoFreeze | 0.00 |
+| Chr_Length | 0.01 |
+| SnpSource | 0.00 |
+| AvgMethod | 0.00 |
+| Species | 0.00 |
+| Dataset_mbat | 0.00 |
+| TissueProbeFreeze | 0.00 |
+| EnsemblChip | 0.00 |
+| TissueProbeSetFreeze | 0.01 |
+| UserPrivilege | 0.00 |
+| CaseAttribute | 0.00 |
+| MappingMethod | 0.00 |
+| DBType | 0.00 |
+| InfoFilesUser_md5 | 0.00 |
+| GenoCode | 0.00 |
+| DatasetStatus | 0.00 |
+| GeneChipEnsemblXRef | 0.00 |
+| GenoSE | 0.00 |
+| user_openids | 0.00 |
+| roles_users | 0.00 |
+| role | 0.00 |
+| Temp | NULL |
++-------------------------+------------+
+97 rows in set, 1 warning (0.01 sec)
+
+All *Data tables are large
+
+** User access
+
+According to the meta data:
+
+This table tracks access time and IP addresses. Used for logging in
+registered users and tracking cookies.
+
+# GN1 uses access table and GN2 uses user table (true/false?)
+
+ select * from AccessLog limit 5;
++-------+---------------------+----------------+
+| id | accesstime | ip_address |
++-------+---------------------+----------------+
+| 12174 | 2003-10-28 02:17:41 | 130.120.104.71 |
+| 12173 | 2003-10-28 02:16:27 | 130.120.104.71 |
+| 3 | 2003-02-22 07:38:33 | 192.117.159.1 |
+| 4 | 2003-02-22 07:49:13 | 192.117.159.1 |
+| 5 | 2003-02-22 07:51:08 | 192.117.159.1 |
++-------+---------------------+----------------+
+
+select * from AccessLog order by accesstime desc limit 5;
++---------+---------------------+---------------+
+| id | accesstime | ip_address |
++---------+---------------------+---------------+
+| 1025735 | 2016-02-08 14:23:29 | 100.43.81.157 |
+| 1025734 | 2016-02-08 13:54:28 | 180.76.15.144 |
+| 1025733 | 2016-02-08 13:43:37 | 66.249.65.217 |
+| 1025732 | 2016-02-08 13:39:50 | 66.249.65.217 |
+| 1025731 | 2016-02-08 13:15:46 | 66.249.65.217 |
++---------+---------------------+---------------+
+
+Quite a few trait page hits:
+
+select count(*) from AccessLog;
+
++----------+
+| count(*) |
++----------+
+| 1025685 |
++----------+
+
+show indexes from AccessLog;
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| AccessLog | 0 | PRIMARY | 1 | id | A | 1025685 | NULL | NULL | | BTREE | | |
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+This table is being used by both GN1 and GN2 from the trait pages!
+
+: grep -ir AccessLog *|grep -e "^gn1\|^gn2"|grep \.py|grep -v doc
+
+gn1/web/webqtl/showTrait/ShowTraitPage.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
+gn1/web/webqtl/showTrait/ShowTraitPage.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip)
+gn1/web/webqtl/textUI/cmdClass.py: query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400"""
+gn1/web/webqtl/textUI/cmdClass.py: query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)"""
+gn2/wqflask/wqflask/show_trait/show_trait_page.py: query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
+gn2/wqflask/wqflask/show_trait/show_trait_page.py: self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip)
+
+When looking at the code in GN1 and GN2 it restricts the daily use of
+the trait data page (set to 1,000 - whoever reaches that?). Unlike
+mentioned in the schema description, this table does *not* keep track
+of cookies.
+
+From the code it looks like GN2 uses a mixture of Redis and sqlalchemy
+to keep track of logged in sessions (see
+gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in
+model.py.
+
+In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it
+show_trait_page appears to be loaded (need to check).
+
+** AvgMethod
+
+Probesetfreeze refers to AvgMethod
+
+** BXDSnPosition
+
+Snp table (all snps)
+
+Mapping in GN1 shows snps when you select a chromosome.
+
+** CaseAttribute(XRef)
+
+Metadata
+
+** CeleralINFO_mm6
+
+?
+
+** Chr_Length
+
+Default mm9, column for mm8
+
+** Dataset_mbat
+
+Menu for BXD (linkouts)
+
+** DatasetMapInvestigator
+
+Arthur?
+
+** DataSets
+
+Information/metadata
+
+** DatasetStatus
+
+Arthur private/public
+
+** DBList and DBType
+
+Hooked in API (URL encoding)
+
+** Docs
+
+GN2 only (see menu bar)
+
+** Ensembl*
+
+Probe information
+
+(will be deprecated)
+
+** Genbank
+
+Linkout and not important
+
+** GeneCategory
+
+Not important. GeneWiki notes function classification.
+
+Deprecate.
+
+** GeneChip
+
+** GeneIDXRef
+
+Interspecies gene comparison
+
+** GeneList
+
+Track info
+
+** Genlist_rn3(3)
+
+Rat list
+
+** GeneMap_cuiyan
+
+Link outs
+
+** GeneRIF
+
+Wiki info (nightly updated from NCBI)
+
+XRef should be foreign keys
+
+** Geno
+
+SNP or marker info
+
+** GenoCode
+
+Belongs to someone else
+
+** GenoData
+
+Allele info
+
+** GenoFreeze
+
+Big menu (Freeze refers to menu)
+
+** GenoSE
+
+SE standard err, not used
+
+** GenoXREF
+
+Very important. Key links between Geno, GenoData
+
+** GORef
+
+GO terms
+
+** H2
+
+Heritability for probeset(?)
+
+** Homologene
+
+Homology, not used much
+
+** InbredSet
+
+Group in menu
+
+** Indelall, SnpAll, SnpPattern, SnpSource
+
+Indel Snp browser (variant browser Gn1)
+
+** Info*
+
+Infra system PhP
+
+Data Info button
+
+Infosystem users has separate entries
+
+Also Investigators, User, Organizations,
+
+** LCorrRamin3
+
+Lit. Correlations Prof. Ramin
+
+** Login
+
+GN2 login info
+
+** MachineAccessLog
+
+Old
+
+** MappingMethod
+
+GN1
+
+** News
+
+GN2
+
+** NStrain
+
+pheno publishfreeze (menu)
+ xref (keys)
+ xref links to publish (pubmed), phenotype, pubishdata
+geno genofreeze
+ xref (keys)
+ xref links to publish (pubmed), genotype, genodata
+probeset/expr. probesetfreeze
+ xref (keys)
+ xref links to publish (pubmed), probeset, probesetdata
+probe/expr. probefreeze
+ xref (keys)
+ xref links to publish (pubmed), probe, probedata
+
+Each dataset has 3 values (real value (1), number of samples (2), stderr (3))
+
+NStrain = number of phenotype samples
+
+ProbesetFreeze contains all data, incl. metabolomic.
+
+** Phenotype
+
+This table contains names, full descriptions, and short symbols for
+traits and phenotype used primarily in the Published Phenotypes
+databases.
+
+Contains 10k rows, March 2016, of which 5000 are for the BXDs).
+
+| Id | Pre_publication_description | Post_publication_description | Original_description | Units | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter | Owner | Authorized_Users |
++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
+| 1 | NULL | Hippocampus weight | Original post publication description: Hippocampus weight | Unknown | NULL | HPCWT | NULL | robwilliams | NULL | robwilliams |
+| 2 | NULL | Cerebellum weight | Original post publication description: Cerebellum weight | mg | NULL | CBLWT | NULL | robwilliams | NULL | robwilliams |
+| 3 | NULL | Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide [units/100 ug protein] | Original post publication description: Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide [units/100 ug protein] | units/100 ug protein | NULL | IL1Activity | NULL | robwilliams | NULL | robwilliams |
+| 4 | NULL | Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg] | Original post publication description: Cerebellum weight [mg] | mg | NULL | CBLWT2 | NULL | robwilliams | NULL | robwilliams |
+| 5 | NULL | The coat color of 79 BXD RI strain | Original post publication description: The coat color of 79 BXD RI strain | Unknown | NULL | CoatColor | NULL | robwilliams | NULL | robwilliams |
++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
+5 rows in set (0.00 sec)
+
+** ProbeData
+
+Table with fine-grained probe level Affymetrix data only. Contains 1
+billion rows March 2016. This table may be deletable since it is only
+used by the Probe Table display in GN1. Not used in GN2
+(double-check).
+
+In comparison the "ProbeSetData" table contains more molecular assay
+data, including probe set data, RNA-seq data, proteomic data, and
+metabolomic data. 2.5 billion rows March 2016. In comparison,
+ProbeData contains data only for Affymetrix probe level data
+(e.g. Exon array probes and M430 probes).
+
+"ProbeData.StrainId" should be "CaseId" or "SampleId".
+
+"ProbeData" should probably be "AssayData" or something more neutral.
+
+select * from ProbeData limit 2;
++--------+----------+---------+
+| Id | StrainId | value |
++--------+----------+---------+
+| 503636 | 42 | 11.6906 |
+| 503636 | 43 | 11.4205 |
++--------+----------+---------+
+2 rows in set (0.00 sec)
+
+select count(*) from ProbeData limit 2;
++-----------+
+| count(*) |
++-----------+
+| 976753435 |
++-----------+
+1 row in set (0.00 sec)
+
+** ProbeSet
+
+Comment: PLEASE CHANGE TABLE NAME and rework fields carefully. This is
+a terrible table but it works well (RWW March 2016). It is used in
+combination with the crucial TRAIT DATA and ANALYSIS pages in GN1 and
+GN2. It is also used by annotators using the UPDATE INFO AND DATA web
+form to correct and update annotation. It is used by Arthur to enter
+new annotation files and metadata for arrays, genes, proteins,
+metabolites. The main problem with this table is that it is doing too
+much work.
+
+Initially (2003) this table contained only Affymetrix ProbeSet data
+for mouse (U74aV2 initially). Many other array platforms for different
+species were added. At least four other major categories of molecular
+assays have been added since about 2010.
+
+1. RNA-seq annotation and sequence data for transcripts using ENSEMBL
+ identifiers or NCBI NM_XXXXX and NR_XXXXX type identifiers
+
+2. Protein and peptide annotation and sequence data (see BXD Liver
+ Proteome data, SRM and SWATH type data) with identifiers such as
+ "abcb10_q9ji39_t311" for SRM data and "LLGNMIVIVLGHHLGKDFTPAAQAA"
+ for SWATH data where the latter is just the peptide fragment that
+ has been quantified. Data first entered in 2015 for work by Rudi
+ Aebersold and colleagues.
+
+3. Metabolite annotation and metadata (see BXD Liver Metabolome data)
+ with identifiers that are usually Mass charge ratios such as
+ "149.0970810_MZ"
+
+4. Epigenomic and methylome data (e.g. Human CANDLE Methylation data
+ with identifiers such as "cg24523000")
+
+It would make good sense to break this table into four or more types
+of molecular assay metadata or annotation tables) (AssayRNA_Anno,
+AssayProtein_Anno, AssayMetabolite_Anno, AssayEpigenome_Anno,
+AssayMetagenome_Anno), since these assays will have many differences
+in annotation content compared to RNAs.
+
+Some complex logic is used to update contents of this table when
+annotators modify and correct the information (for example, updating
+gene symbols). These features requested by Rob so that annotating one
+gene symbol in one species would annotate all gene symbols in the same
+species based on common NCBI GeneID number. For example, changing the
+gene alias for one ProbeSet.Id will changing the list of aliases in
+all instances with the same gene symbol.
+
+If the ProbeSet.BlatSeq (or is this ProbSetTargetSeq) is identical
+between different ProbeSet.Ids then annotation is forced to be the
+same even if the symbol or geneID is different. This "feature" was
+implemented when we found many probe sets with identical sequence but
+different annotations and identifiers.
+
+
+select count(*) from ProbeSet limit 5;
++----------+
+| count(*) |
++----------+
+| 4351030 |
++----------+
+
+| Id | ChipId | Name | TargetId | Symbol | description | Chr | Mb | alias | GeneId | GenbankId | SNP | BlatSeq | TargetSeq | UniGeneId | Strand_Probe | Strand_Gene | OMIM | comments | Probe_set_target_region | Probe_set_specificity | Probe_set_BLAT_score | Probe_set_Blat_Mb_start | Probe_set_Blat_Mb_end | Probe_set_strand | Probe_set_Note_by_RW | flag | Symbol_H | description_H | chromosome_H | MB_H | alias_H | GeneId_H | chr_num | name_num | Probe_Target_Description | RefSeq_TranscriptId | Chr_mm8 | Mb_mm8 | Probe_set_Blat_Mb_start_mm8 | Probe_set_Blat_Mb_end_mm8 | HomoloGeneID | Biotype_ENS | ProteinID | ProteinName | Flybase_Id | HMDB_ID | Confidence | ChEBI_ID | ChEMBL_ID | CAS_number | PubChem_ID | ChemSpider_ID | UNII_ID | EC_number | KEGG_ID | Molecular_Weight | Nugowiki_ID | Type | Tissue | PrimaryName | SecondaryNames | PeptideSequence |
++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+
+| 7282 | 1 | 93288_at | NULL | Arpc2 | actin related protein 2/3 complex, subunit 2 | 1 | 74.310961 | AK008777 | 76709 | AI835883 | 0 | CCGACTTCCTTAAGGTGCTCAACCGGACTGCTTGCTACTGGATAATCGTGAGGGATTCTCCATTTGGGTTCCATTTTGTACGAGTTTGGCAAATAACCTGCAGAAACGAGCTGTGCTTGCAAGGACTTGATAGTTCCTAATCCTTTTCCAAGCTGTTTGCTTTGCAATATGT | ccgacttccttaaggtgctcaaccgtnnnnnnccnannnnccnagaaaaaagaaatgaaaannnnnnnnnnnnnnnnnnnttcatcccgctaactcttgggaactgaggaggaagcgctgtcgaccgaagnntggactgcttgctactggataatcgtnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntgagggattctccatttgggttccattttgtacgagtttggcaaataacctgcagaaacgagctgtgcttgcaaggacttgatagttcctaagaattanaanaaaaaaaanaanttccacttgatcaanttaattcccttttatttttcctccctcantccccttccttttccaagctgtttgctttgcaatatgt | Mm.337038 | + | | 604224 | | NULL | 8.45 | 169 | 74.310961 | 74.31466 | NULL | NULL | 3 | NULL | NULL | NULL | NULL | NULL | NULL | 1 | 93288 | NULL | XM_129773 | 1 | 74.197594 | 74.197594 | 74.201293 | 4187 | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+
+2 rows in set (0.00 sec)
+
+
+
+
+** ProbeSetData
+
+Probedata - main molecular data. Probesets, metabolome,
+
+Almost all important molecular assay data is in this table including
+probe set data, RNA-seq data, proteomic data, and metabolomic
+data. 2.5 billion rows March 2016. In comparison, ProbeData contains
+data only for Affymetrix probe level data (e.g. Exon array probes and
+M430 probes).
+
+select count(*) from ProbeSetData limit 5;
++---------------+
+| count(*) |
++---------------+
+| 2,510,566,472 |
++---------------+
+
+
+select * from ProbeSetData limit 5;
++----+----------+-------+
+| Id | StrainId | value |
++----+----------+-------+
+| 1 | 1 | 5.742 |
+| 1 | 2 | 5.006 |
+| 1 | 3 | 6.079 |
+| 1 | 4 | 6.414 |
+| 1 | 5 | 4.885 |
++----+----------+-------+
+
+show indexes from ProbeSetData;
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetData | 0 | DataId | 1 | Id | A | 34868978 | NULL | NULL | | BTREE | | |
+| ProbeSetData | 0 | DataId | 2 | StrainId | A | 2510566472 | NULL | NULL | | BTREE | | |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+select * from Strain limit 5;
++----+----------+----------+-----------+--------+-------+
+| Id | Name | Name2 | SpeciesId | Symbol | Alias |
++----+----------+----------+-----------+--------+-------+
+| 1 | B6D2F1 | B6D2F1 | 1 | NULL | NULL |
+| 2 | C57BL/6J | C57BL/6J | 1 | B6J | NULL |
+| 3 | DBA/2J | DBA/2J | 1 | D2J | NULL |
+| 4 | BXD1 | BXD1 | 1 | NULL | NULL |
+| 5 | BXD2 | BXD2 | 1 | NULL | NULL |
++----+----------+----------+-----------+--------+-------+
+
+show indexes from Strain;
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Strain | 0 | PRIMARY | 1 | Id | A | 14368 | NULL | NULL | | BTREE | | |
+| Strain | 0 | Name | 1 | Name | A | 14368 | NULL | NULL | YES | BTREE | | |
+| Strain | 0 | Name | 2 | SpeciesId | A | 14368 | NULL | NULL | | BTREE | | |
+| Strain | 1 | Symbol | 1 | Symbol | A | 14368 | NULL | NULL | YES | BTREE | | |
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+A typical query may look like
+
+SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+ FROM (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+ left join ProbeSetSE on
+ (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+ WHERE
+ ProbeSetFreeze.name = 'B139_K_1206_M' AND
+ ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+ ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+ ProbeSetXRef.DataId = ProbeSetData.Id AND
+ ProbeSetData.StrainId = Strain.Id
+ Order BY Strain.Name
+
++-------+-------+-------+----------+
+| Name | value | error | Id |
++-------+-------+-------+----------+
+| SM001 | 38.3 | NULL | 25309550 |
+| SM001 | 2.7 | NULL | 25309520 |
+| SM001 | 20.3 | NULL | 25309507 |
+| SM001 | 125.8 | NULL | 25309511 |
+| SM001 | 8.2 | NULL | 25309534 |
++-------+-------+-------+----------+
+5 rows in set (22.28 sec)
+
+select * from ProbeSetFreeze limit 5;
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+| Id | ProbeFreezeId | AvgID | Name | Name2 | FullName | ShortName | CreateTime | OrderList | public | confidentiality | AuthorisedUsers | DataScale |
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+| 1 | 3 | 1 | Br_U_0803_M | BXDMicroArray_ProbeSet_August03 | UTHSC Brain mRNA U74Av2 (Aug03) MAS5 | Brain U74Av2 08/03 MAS5 | 2003-08-01 | NULL | 0 | 0 | NULL | log2 |
+| 2 | 10 | 1 | Br_U_0603_M | BXDMicroArray_ProbeSet_June03 | UTHSC Brain mRNA U74Av2 (Jun03) MAS5 | Brain U74Av2 06/03 MAS5 | 2003-06-01 | NULL | 0 | 0 | NULL | log2 |
+| 3 | 8 | 1 | Br_U_0303_M | BXDMicroArray_ProbeSet_March03 | UTHSC Brain mRNA U74Av2 (Mar03) MAS5 | Brain U74Av2 03/03 MAS5 | 2003-03-01 | NULL | 0 | 0 | NULL | log2 |
+| 4 | 5 | 1 | Br_U_0503_M | BXDMicroArray_ProbeSet_May03 | UTHSC Brain mRNA U74Av2 (May03) MAS5 | Brain U74Av2 05/03 MAS5 | 2003-05-01 | NULL | 0 | 0 | NULL | log2 |
+| 5 | 4 | 1 | HC_U_0303_M | GNFMicroArray_ProbeSet_March03 | GNF Hematopoietic Cells U74Av2 (Mar03) MAS5 | GNF U74Av2 03/03 MAS5 | 2003-03-01 | NULL | 0 | 0 | NULL | log2 |
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+
+ select * from ProbeSetXRef limit 5;
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+| ProbeSetFreezeId | ProbeSetId | DataId | Locus_old | LRS_old | pValue_old | mean | se | Locus | LRS | pValue | additive | h2 |
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+| 1 | 1 | 1 | 10.095.400 | 13.3971627898894 | 0.163 | 5.48794285714286 | 0.08525787814808819 | rs13480619 | 12.590069931048001 | 0.269 | -0.28515625 | NULL |
+| 1 | 2 | 2 | D15Mit189 | 10.042057464356201 | 0.431 | 9.90165714285714 | 0.0374686634976217 | CEL-17_50896182 | 10.5970737900941 | 0.304 | -0.11678333333333299 | NULL |
+| 1 | 3 | 3 | D5Mit139 | 5.43678531742749 | 0.993 | 7.83948571428571 | 0.0457583416912569 | rs13478499 | 6.0970532702754 | 0.988 | 0.112957489878542 | NULL |
+| 1 | 4 | 4 | D1Mit511 | 9.87815279480766 | 0.483 | 8.315628571428569 | 0.0470396593931327 | rs6154379 | 11.774867551173099 | 0.286 | -0.157113725490196 | NULL |
+| 1 | 5 | 5 | D16H21S16 | 10.191723834264499 | 0.528 | 9.19345714285714 | 0.0354801718293322 | rs4199265 | 10.923263374016202 | 0.468 | 0.11476470588235299 | NULL |
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+
+
+Note that the following unlimited search is very slow:
+
+select max(value) from ProbeSetData;
+
++------------+
+| max(value) |
++------------+
+| 26436006 |
++------------+
+1 row in set (2 min 16.31 sec)
+
+which is in some form is used in the search page, see [[https://github.com/genenetwork/genenetwork2_diet/blob/master/wqflask/wqflask/do_search.py#L811][the search code]].
+
+
+*** Improvements?
+
+Suggestions on the schema page:
+
+"StrainId" should be "CaseId" or "SampleId".
+
+"ProbeSetData" should probably be "AssayData" or something more neutral.
+
+*** Comments
+
+I think the ProbeSetData table should be generalized to a 'phenotypes'
+table with an 'sample_id' column and a 'value' column.
+
+A new table 'samples' will link each sample against an 'experiment',
+an 'individual' and which in turn can link to a 'strain'.
+
+Experiment is here in a wide sense, GTex can be one - I don't want to
+use dataset ;)
+
+This means a (slight) reordering:
+
+phenotypes: (id), sample_id, value
+samples: experiment_id, individual_id
+experiments: name, version
+individual: strain_id
+strains: species_id
+species: ...
+
+ProbeData is also interesting, because it has the same structure as
+ProbeSetData, but only contains microarrays. This tables should be one
+(when we clear up the cross-referencing) as they both contain
+phenotype values. Both are large tables.
+
+PublishData is another phenotype table with values only which can be
+merged into that same table.
+
+So we have phenotype data in 3 tables with exactly the same
+layout. There is also TissueProbeSet*, but we'll ignore those for
+now. I think we should merge these into one and have the sample ref
+refer to the type of data (probeset, probe, metabolomics,
+whatever). These are all phenotype values and by having them split
+into different tables they won't play well when looking for
+correlations.
+
+ProbeSet contains the metadata on the probes and should (eventually)
+move into NoSQL. There is plenty redundancy in that table now.
+
+I know it is going to be a pain to reorganize the database, but if we
+want to use it in the long run we are going to have to simplify it.
+
+
+
+** Publication and publishdata (all pheno)
+
+Phenotype pubs
+
+** QuickSearch
+
+No longer used
+
+** role
+
+empty
+
+** Sample*
+
+No longer used
+
+** Species & Strain (should be sample)
+
+Menu
+
+** InbredSet
+
+Menu
+
+** TableComments
+
+Metadata on DB
+
+** Temp*
+
+User upload data
+
+** Tissue
+
+Menu - 3rd level
+
+** TissueP*
+
+Correlation tables
+
+** User collection
+
+User selection - retained
+
+** UserPrivilege
+
+** Vlookup
+
diff --git a/doc/new_variable_names.txt b/doc/new_variable_names.txt
deleted file mode 100644
index c11c160e..00000000
--- a/doc/new_variable_names.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-RISet/riset -> group
-webqtlDataset.py -> data_set.py
-webqtlDataset (class object) -> DataSet
-database/db -> dataset/data_set
-DataEditingPage -> show_trait.py/show_trait.html
-webqtlTrait -> GeneralTrait \ No newline at end of file
diff --git a/doc/notes_DA.txt b/doc/notes_DA.txt
deleted file mode 100644
index 410e0182..00000000
--- a/doc/notes_DA.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Danny's notes about the genenetwork source
-
-Location of static files:
-
-Location of HTML templates: wqflask/wqflask/templates/
-
-Entry point of the wqflask app: wqflask/wqflask/__init__.py
-
-Application routes: wqflask/wqflask/views.py
-
diff --git a/doc/gn_installation_notes.txt b/doc/old/gn_installation_notes.txt
index 584080f7..efea0309 100644
--- a/doc/gn_installation_notes.txt
+++ b/doc/old/gn_installation_notes.txt
@@ -96,7 +96,7 @@ Before installing from requirements.txt, install numpy separately:
pip install numpy==1.7.0 (or whatever version we're using)
Install from requirements.txt (after activating virtualenv):
-pip install -r gene/misc/requirements.txt
+pip install -r gene/doc/requirements.txt
===========================================
@@ -343,4 +343,4 @@ python runserver.py
To do full upgrade (as opposed to apt-get upgrade)
sudo aptitude full-upgrade
-=========================================== \ No newline at end of file
+===========================================
diff --git a/doc/notes.txt b/doc/old/notes.txt
index f8ce2759..f8ce2759 100644
--- a/doc/notes.txt
+++ b/doc/old/notes.txt
diff --git a/doc/requirements.txt b/doc/requirements.txt
deleted file mode 100644
index 39ee5652..00000000
--- a/doc/requirements.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-BeautifulSoup==3.2.1
-Flask==0.9
-Flask-Login==0.1.3
-Flask-Mail==0.7.6
-Flask-Principal==0.3.4
-Flask-SQLAlchemy==0.16
-Flask-Security==1.6.0
-Flask-WTF==0.8.3
-Jinja2==2.6
-MySQL-python==1.2.4
-PyYAML==3.10
-#Reaper==1.0
-Reindent==0.1.1
-SQLAlchemy==0.8.0
-WTForms==1.0.3
-Werkzeug==0.8.3
-apache-libcloud==0.12.3
-argparse==1.2.1
-blinker==1.2
-cairosvg==1.0.15
-itsdangerous==0.17
-logging-tree==1.2
-logilab-astng==0.24.3
-logilab-common==0.59.1
-#numarray==1.5.2
-numpy==1.7.0
-passlib==1.6.1
-pp==1.6.3
-pylint==0.27.0
-redis==2.7.2
-requests==1.1.0
-scipy==0.11.0
-simplejson==3.0.7
-wsgiref==0.1.2
-yolk==0.4.3
-XlsxWriter==0.7.2
diff --git a/doc/todo.txt b/doc/todo.txt
deleted file mode 100644
index 1d781b13..00000000
--- a/doc/todo.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-- Ask Rob about potentially recoding qtlreaper
-- Ask Rob about Probe/cellid traits \ No newline at end of file
diff --git a/etc/default_settings.py b/etc/default_settings.py
new file mode 100644
index 00000000..0cf40265
--- /dev/null
+++ b/etc/default_settings.py
@@ -0,0 +1,29 @@
+import os
+import sys
+
+LOGFILE = "/tmp/genenetwork2.log"
+
+# This is needed because Flask turns key errors into a
+# 400 bad request response with no exception/log
+TRAP_BAD_REQUEST_ERRORS = True
+
+DB_URI = "mysql://gn2:mysql_password@localhost/db_webqtl_s"
+SQLALCHEMY_DATABASE_URI = 'mysql://gn2:mysql_password@localhost/db_webqtl_s'
+
+# http://pythonhosted.org/Flask-Security/configuration.html
+SECURITY_CONFIRMABLE = True
+SECURITY_TRACKABLE = True
+SECURITY_REGISTERABLE = True
+SECURITY_RECOVERABLE = True
+SECURITY_EMAIL_SENDER = "no-reply@genenetwork.org"
+SECURITY_POST_LOGIN_VIEW = "/thank_you"
+SQLALCHEMY_POOL_RECYCLE = 3600
+
+SERVER_PORT = 5003
+SECRET_HMAC_CODE = '\x08\xdf\xfa\x93N\x80\xd9\\H@\\\x9f`\x98d^\xb4a;\xc6OM\x946a\xbc\xfc\x80:*\xebc'
+
+# Path overrides for Genenetwork
+GENENETWORK_FILES = os.environ['HOME']+"/gn2_data"
+PYLMM_COMMAND = str.strip(os.popen("which pylmm_redis").read())
+PLINK_COMMAND = str.strip(os.popen("which plink2").read())
+GEMMA_COMMAND = str.strip(os.popen("which gemma").read())
diff --git a/setup.py b/setup.py
index 1b11acc0..a9b71fab 100755
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,17 @@
+# Run setup from python - this script is used by the GNU Guix builder.
+
from setuptools import setup, find_packages
setup(name='genenetwork2',
version='2.0',
+ author = "The GeneNetwork Team",
+ author_email='rwilliams@uthsc.edu',
+ url = "https://github.com/genenetwork/genenetwork2/blob/master/README.md",
+ description = 'Website and tools for genetics.',
include_package_data=True,
- # packages=find_packages()
- packages=['wqflask']
- )
+ packages=['wqflask','etc'],
+ scripts=['bin/genenetwork2'],
+ # package_data = {
+ # 'etc': ['*.py']
+ # }
+)
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index e37a838f..52ac95f0 100755
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -44,6 +44,7 @@ from dbFunction import webqtlDatabaseFunction
from utility import webqtlUtil
from utility.benchmark import Bench
from utility import chunks
+from utility.tools import locate, locate_ignore_error
from maintenance import get_group_samplelists
@@ -57,40 +58,26 @@ DS_NAME_MAP = {}
def create_dataset(dataset_name, dataset_type = None, get_samplelist = True):
if not dataset_type:
dataset_type = Dataset_Getter(dataset_name)
- #dataset_type = get_dataset_type_from_json(dataset_name)
print("dataset_type is:", dataset_type)
- #query = """
- # SELECT DBType.Name
- # FROM DBList, DBType
- # WHERE DBList.Name = '{}' and
- # DBType.Id = DBList.DBTypeId
- # """.format(escape(dataset_name))
- #dataset_type = g.db.execute(query).fetchone().Name
-
dataset_ob = DS_NAME_MAP[dataset_type]
dataset_class = globals()[dataset_ob]
return dataset_class(dataset_name, get_samplelist)
-
-#def get_dataset_type_from_json(dataset_name):
-
class Dataset_Types(object):
-
+
def __init__(self):
self.datasets = {}
file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
with open(file_name, 'r') as fh:
data = json.load(fh)
-
+
print("*" * 70)
for species in data['datasets']:
for group in data['datasets'][species]:
for dataset_type in data['datasets'][species][group]:
for dataset in data['datasets'][species][group][dataset_type]:
- #print("dataset is:", dataset)
-
short_dataset_name = dataset[1]
if dataset_type == "Phenotypes":
new_type = "Publish"
@@ -99,32 +86,28 @@ class Dataset_Types(object):
else:
new_type = "ProbeSet"
self.datasets[short_dataset_name] = new_type
-
+
def __call__(self, name):
return self.datasets[name]
-
+
# Do the intensive work at startup one time only
Dataset_Getter = Dataset_Types()
-#
-#print("Running at startup:", get_dataset_type_from_json("HBTRC-MLPFC_0611"))
-
-
def create_datasets_list():
key = "all_datasets"
result = Redis.get(key)
-
+
if result:
print("Cache hit!!!")
datasets = pickle.loads(result)
-
+
else:
datasets = list()
with Bench("Creating DataSets object"):
type_dict = {'Publish': 'PublishFreeze',
'ProbeSet': 'ProbeSetFreeze',
'Geno': 'GenoFreeze'}
-
+
for dataset_type in type_dict:
query = "SELECT Name FROM {}".format(type_dict[dataset_type])
for result in g.db.execute(query).fetchall():
@@ -133,10 +116,10 @@ def create_datasets_list():
#print("type: {}\tname: {}".format(dataset_type, result.Name))
dataset = create_dataset(result.Name, dataset_type)
datasets.append(dataset)
-
+
Redis.set(key, pickle.dumps(datasets, pickle.HIGHEST_PROTOCOL))
Redis.expire(key, 60*60)
-
+
return datasets
@@ -157,30 +140,30 @@ def mescape(*items):
class Markers(object):
"""Todo: Build in cacheing so it saves us reading the same file more than once"""
def __init__(self, name):
- json_data_fh = open(os.path.join(webqtlConfig.NEWGENODIR + name + '.json'))
+ json_data_fh = open(locate(name + '.json','genotype/json'))
try:
markers = json.load(json_data_fh)
except:
markers = []
-
+
for marker in markers:
if (marker['chr'] != "X") and (marker['chr'] != "Y"):
marker['chr'] = int(marker['chr'])
marker['Mb'] = float(marker['Mb'])
-
+
self.markers = markers
#print("self.markers:", self.markers)
-
-
+
+
def add_pvalues(self, p_values):
print("length of self.markers:", len(self.markers))
print("length of p_values:", len(p_values))
-
+
if type(p_values) is list:
# THIS IS only needed for the case when we are limiting the number of p-values calculated
#if len(self.markers) > len(p_values):
# self.markers = self.markers[:len(p_values)]
-
+
for marker, p_value in itertools.izip(self.markers, p_values):
if not p_value:
continue
@@ -213,18 +196,11 @@ class Markers(object):
#self.markers.remove(marker)
#del self.markers[i]
self.markers = filtered_markers
-
-
- #for i, marker in enumerate(self.markers):
- # if not 'p_value' in marker:
- # #print("self.markers[i]", self.markers[i])
- # del self.markers[i]
- # #self.markers.remove(self.markers[i])
class HumanMarkers(Markers):
-
+
def __init__(self, name, specified_markers = []):
- marker_data_fh = open(os.path.join(webqtlConfig.PYLMM_PATH + name + '.bim'))
+ marker_data_fh = open(locate('genotype') + '/' + name + '.bim')
self.markers = []
for line in marker_data_fh:
splat = line.strip().split()
@@ -243,39 +219,21 @@ class HumanMarkers(Markers):
marker['name'] = splat[1]
marker['Mb'] = float(splat[3]) / 1000000
self.markers.append(marker)
-
+
#print("markers is: ", pf(self.markers))
def add_pvalues(self, p_values):
- #for marker, p_value in itertools.izip(self.markers, p_values):
- # if marker['Mb'] <= 0 and marker['chr'] == 0:
- # continue
- # marker['p_value'] = p_value
- # print("p_value is:", marker['p_value'])
- # marker['lod_score'] = -math.log10(marker['p_value'])
- # #Using -log(p) for the LRS; need to ask Rob how he wants to get LRS from p-values
- # marker['lrs_value'] = -math.log10(marker['p_value']) * 4.61
-
- #print("p_values2:", pf(p_values))
super(HumanMarkers, self).add_pvalues(p_values)
-
- #with Bench("deleting markers"):
- # markers = []
- # for marker in self.markers:
- # if not marker['Mb'] <= 0 and not marker['chr'] == 0:
- # markers.append(marker)
- # self.markers = markers
-
-
+
class DatasetGroup(object):
"""
Each group has multiple datasets; each species has multiple groups.
-
+
For example, Mouse has multiple groups (BXD, BXA, etc), and each group
has multiple datasets associated with it.
-
+
"""
def __init__(self, dataset):
"""This sets self.group and self.group_id"""
@@ -283,14 +241,14 @@ class DatasetGroup(object):
self.name, self.id = g.db.execute(dataset.query_for_group).fetchone()
if self.name == 'BXD300':
self.name = "BXD"
-
+
self.f1list = None
self.parlist = None
self.get_f1_parent_strains()
#print("parents/f1s: {}:{}".format(self.parlist, self.f1list))
-
+
self.species = webqtlDatabaseFunction.retrieve_species(self.name)
-
+
self.incparentsf1 = False
self.allsamples = None
self._datasets = None
@@ -301,7 +259,7 @@ class DatasetGroup(object):
def get_markers(self):
#print("self.species is:", self.species)
if self.species == "human":
- marker_class = HumanMarkers
+ marker_class = HumanMarkers
else:
marker_class = Markers
@@ -310,12 +268,6 @@ class DatasetGroup(object):
def datasets(self):
key = "group_dataset_menu:v2:" + self.name
print("key is2:", key)
- #with Bench("Loading cache"):
- # result = Redis.get(key)
- #if result:
- # self._datasets = pickle.loads(result)
- # return self._datasets
-
dataset_menu = []
print("[tape4] webqtlConfig.PUBLICTHRESH:", webqtlConfig.PUBLICTHRESH)
print("[tape4] type webqtlConfig.PUBLICTHRESH:", type(webqtlConfig.PUBLICTHRESH))
@@ -355,7 +307,7 @@ class DatasetGroup(object):
dataset_menu.append(dict(tissue=None, datasets=[(dataset, dataset_short)]))
else:
dataset_sub_menu = [item[1:] for item in dataset]
-
+
tissue_already_exists = False
tissue_position = None
for i, tissue_dict in enumerate(dataset_menu):
@@ -383,7 +335,7 @@ class DatasetGroup(object):
f1, f12, maternal, paternal = webqtlUtil.ParInfo[self.name]
except KeyError:
f1 = f12 = maternal = paternal = None
-
+
if f1 and f12:
self.f1list = [f1, f12]
if maternal and paternal:
@@ -402,21 +354,17 @@ class DatasetGroup(object):
#print(" type: ", type(self.samplelist))
#print(" self.samplelist: ", self.samplelist)
else:
- #print("Cache not hit")
-
- from utility.tools import plink_command
- PLINK_PATH,PLINK_COMMAND = plink_command()
-
- geno_file_path = webqtlConfig.GENODIR+self.name+".geno"
- plink_file_path = PLINK_PATH+"/"+self.name+".fam"
-
- if os.path.isfile(plink_file_path):
- self.samplelist = get_group_samplelists.get_samplelist("plink", plink_file_path)
- elif os.path.isfile(geno_file_path):
- self.samplelist = get_group_samplelists.get_samplelist("geno", geno_file_path)
+ print("Cache not hit")
+
+ genotype_fn = locate_ignore_error(self.name+".geno",'genotype')
+ mapping_fn = locate_ignore_error(self.name+".fam",'mapping')
+ if mapping_fn:
+ self.samplelist = get_group_samplelists.get_samplelist("plink", mapping_fn)
+ elif genotype_fn:
+ self.samplelist = get_group_samplelists.get_samplelist("geno", genotype_fn)
else:
self.samplelist = None
- #print("after get_samplelist")
+ print("Sample list: ",self.samplelist)
Redis.set(key, json.dumps(self.samplelist))
Redis.expire(key, 60*5)
@@ -428,30 +376,14 @@ class DatasetGroup(object):
def read_genotype_file(self):
'''Read genotype from .geno file instead of database'''
- #if self.group == 'BXD300':
- # self.group = 'BXD'
- #
- #assert self.group, "self.group needs to be set"
-
#genotype_1 is Dataset Object without parents and f1
#genotype_2 is Dataset Object with parents and f1 (not for intercross)
genotype_1 = reaper.Dataset()
# reaper barfs on unicode filenames, so here we ensure it's a string
- full_filename = str(os.path.join(webqtlConfig.GENODIR, self.name + '.geno'))
- if os.path.isfile(full_filename):
- #print("Reading file: ", full_filename)
- genotype_1.read(full_filename)
- #print("File read")
- else:
- try:
- full_filename = str(os.path.join(webqtlConfig.TMPDIR, self.name + '.geno'))
- #print("Reading file")
- genotype_1.read(full_filename)
- #print("File read")
- except IOError:
- print("File doesn't exist!")
+ full_filename = str(locate(self.name+'.geno','genotype'))
+ genotype_1.read(full_filename)
if genotype_1.type == "group" and self.parlist:
genotype_2 = genotype_1.add(Mat=self.parlist[0], Pat=self.parlist[1]) #, F1=_f1)
@@ -460,39 +392,15 @@ class DatasetGroup(object):
#determine default genotype object
if self.incparentsf1 and genotype_1.type != "intercross":
- #self.genotype = genotype_2
genotype = genotype_2
else:
self.incparentsf1 = 0
- #self.genotype = genotype_1
genotype = genotype_1
- #self.samplelist = list(self.genotype.prgy)
self.samplelist = list(genotype.prgy)
-
- return genotype
-
-
-#class DataSets(object):
-# """Builds a list of DataSets"""
-#
-# def __init__(self):
-# self.datasets = list()
-#
-
- #query = """SELECT Name FROM ProbeSetFreeze
- # UNION
- # SELECT Name From PublishFreeze
- # UNION
- # SELECT Name From GenoFreeze"""
- #
- #for result in g.db.execute(query).fetchall():
- # dataset = DataSet(result.Name)
- # self.datasets.append(dataset)
+ return genotype
-#ds = DataSets()
-#print("[orange] ds:", ds.datasets)
class DataSet(object):
"""
@@ -515,7 +423,7 @@ class DataSet(object):
self.check_confidentiality()
self.retrieve_other_names()
-
+
self.group = DatasetGroup(self) # sets self.group and self.group_id and gets genotype
if get_samplelist == True:
self.group.get_samplelist()
@@ -525,32 +433,11 @@ class DataSet(object):
def get_desc(self):
"""Gets overridden later, at least for Temp...used by trait's get_given_name"""
return None
-
- #@staticmethod
- #def get_by_trait_id(trait_id):
- # """Gets the dataset object given the trait id"""
- #
- #
- #
- # name = g.db.execute(""" SELECT
- #
- # """)
- #
- # return DataSet(name)
# Delete this eventually
@property
def riset():
Weve_Renamed_This_As_Group
-
-
- #@property
- #def group(self):
- # if not self._group:
- # self.get_group()
- #
- # return self._group
-
def retrieve_other_names(self):
"""
@@ -560,7 +447,7 @@ class DataSet(object):
This is not meant to retrieve the data set info if no name at all is passed.
"""
-
+
try:
if self.type == "ProbeSet":
query_args = tuple(escape(x) for x in (
@@ -596,17 +483,17 @@ class DataSet(object):
except TypeError:
print("Dataset {} is not yet available in GeneNetwork.".format(self.name))
pass
-
+
def get_trait_data(self, sample_list=None):
if sample_list:
self.samplelist = sample_list
else:
self.samplelist = self.group.samplelist
-
+
if self.group.parlist != None and self.group.f1list != None:
if (self.group.parlist + self.group.f1list) in self.samplelist:
self.samplelist += self.group.parlist + self.group.f1list
-
+
query = """
SELECT Strain.Name, Strain.Id FROM Strain, Species
WHERE Strain.Name IN {}
@@ -623,21 +510,6 @@ class DataSet(object):
number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
trait_sample_data = []
for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
-
- #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
- #tempTable = None
- #if GeneId and db.type == "ProbeSet":
- # if method == "3":
- # tempTable = self.getTempLiteratureTable(species=species,
- # input_species_geneid=GeneId,
- # returnNumber=returnNumber)
- #
- # if method == "4" or method == "5":
- # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
- # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
- # method=method,
- # returnNumber=returnNumber)
-
if self.type == "Publish":
dataset_type = "Phenotype"
else:
@@ -658,7 +530,7 @@ class DataSet(object):
left join {}Data as T{} on T{}.Id = {}XRef.DataId
and T{}.StrainId={}\n
""".format(*mescape(self.type, item, item, self.type, item, item))
-
+
if self.type == "Publish":
query += """
WHERE {}XRef.InbredSetId = {}Freeze.InbredSetId
@@ -675,16 +547,16 @@ class DataSet(object):
order by {}.Id
""".format(*mescape(self.type, self.type, self.type, self.type,
self.name, dataset_type, self.type, self.type, dataset_type))
-
+
#print("trait data query: ", query)
-
+
results = g.db.execute(query).fetchall()
#print("query results:", results)
trait_sample_data.append(results)
trait_count = len(trait_sample_data[0])
self.trait_data = collections.defaultdict(list)
-
+
# put all of the separate data together into a dictionary where the keys are
# trait names and values are lists of sample values
for trait_counter in range(trait_count):
@@ -697,9 +569,9 @@ class PhenotypeDataSet(DataSet):
DS_NAME_MAP['Publish'] = 'PhenotypeDataSet'
def setup(self):
-
+
#print("IS A PHENOTYPEDATASET")
-
+
# Fields in the database table
self.search_fields = ['Phenotype.Post_publication_description',
'Phenotype.Pre_publication_description',
@@ -770,26 +642,26 @@ class PhenotypeDataSet(DataSet):
def get_trait_info(self, trait_list, species = ''):
for this_trait in trait_list:
-
+
if not this_trait.haveinfo:
this_trait.retrieve_info(get_qtl_info=True)
description = this_trait.post_publication_description
-
+
#If the dataset is confidential and the user has access to confidential
#phenotype traits, then display the pre-publication description instead
#of the post-publication description
if this_trait.confidential:
this_trait.description_display = ""
continue # for now
-
+
if not webqtlUtil.hasAccessToConfidentialPhenotypeTrait(
privilege=self.privilege,
userName=self.userName,
authorized_users=this_trait.authorized_users):
-
+
description = this_trait.pre_publication_description
-
+
if len(description) > 0:
this_trait.description_display = description.strip()
else:
@@ -834,7 +706,7 @@ class PhenotypeDataSet(DataSet):
this_trait.LRS_score_repr = LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = LRS_score_value = this_trait.lrs
this_trait.LRS_location_repr = LRS_location_repr = 'Chr%s: %.6f' % (LRS_Chr, float(LRS_Mb))
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -892,7 +764,7 @@ class GenotypeDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
-
+
def get_trait_list(self):
query = """
select Geno.Name
@@ -926,7 +798,7 @@ class GenotypeDataSet(DataSet):
this_trait.location_repr = 'Chr%s: %.6f' % (this_trait.chr, float(this_trait.mb) )
this_trait.location_value = trait_location_value
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1018,7 +890,7 @@ class MrnaAssayDataSet(DataSet):
def check_confidentiality(self):
return geno_mrna_confidentiality(self)
-
+
def get_trait_list_1(self):
query = """
select ProbeSet.Name
@@ -1027,86 +899,14 @@ class MrnaAssayDataSet(DataSet):
and ProbeSetFreezeId = {}
""".format(escape(str(self.id)))
results = g.db.execute(query).fetchall()
- #print("After get_trait_list query")
trait_data = {}
for trait in results:
- #print("Retrieving sample_data for ", trait[0])
trait_data[trait[0]] = self.retrieve_sample_data(trait[0])
- #print("After retrieve_sample_data")
return trait_data
-
- #def get_trait_data(self):
- # self.samplelist = self.group.samplelist + self.group.parlist + self.group.f1list
- # query = """
- # SELECT Strain.Name, Strain.Id FROM Strain, Species
- # WHERE Strain.Name IN {}
- # and Strain.SpeciesId=Species.Id
- # and Species.name = '{}'
- # """.format(create_in_clause(self.samplelist), *mescape(self.group.species))
- # results = dict(g.db.execute(query).fetchall())
- # sample_ids = [results[item] for item in self.samplelist]
- #
- # # MySQL limits the number of tables that can be used in a join to 61,
- # # so we break the sample ids into smaller chunks
- # # Postgres doesn't have that limit, so we can get rid of this after we transition
- # chunk_size = 50
- # number_chunks = int(math.ceil(len(sample_ids) / chunk_size))
- # trait_sample_data = []
- # for sample_ids_step in chunks.divide_into_chunks(sample_ids, number_chunks):
- #
- # #XZ, 09/24/2008: build one temporary table that only contains the records associated with the input GeneId
- # #tempTable = None
- # #if GeneId and db.type == "ProbeSet":
- # # if method == "3":
- # # tempTable = self.getTempLiteratureTable(species=species,
- # # input_species_geneid=GeneId,
- # # returnNumber=returnNumber)
- # #
- # # if method == "4" or method == "5":
- # # tempTable = self.getTempTissueCorrTable(primaryTraitSymbol=GeneSymbol,
- # # TissueProbeSetFreezeId=tissueProbeSetFreezeId,
- # # method=method,
- # # returnNumber=returnNumber)
- #
- # temp = ['T%s.value' % item for item in sample_ids_step]
- # query = "SELECT {}.Name,".format(escape(self.type))
- # data_start_pos = 1
- # query += string.join(temp, ', ')
- # query += ' FROM ({}, {}XRef, {}Freeze) '.format(*mescape(self.type,
- # self.type,
- # self.type))
- #
- # for item in sample_ids_step:
- # query += """
- # left join {}Data as T{} on T{}.Id = {}XRef.DataId
- # and T{}.StrainId={}\n
- # """.format(*mescape(self.type, item, item, self.type, item, item))
- #
- # query += """
- # WHERE {}XRef.{}FreezeId = {}Freeze.Id
- # and {}Freeze.Name = '{}'
- # and {}.Id = {}XRef.{}Id
- # order by {}.Id
- # """.format(*mescape(self.type, self.type, self.type, self.type,
- # self.name, self.type, self.type, self.type, self.type))
- # results = g.db.execute(query).fetchall()
- # trait_sample_data.append(results)
- #
- # trait_count = len(trait_sample_data[0])
- # self.trait_data = collections.defaultdict(list)
- #
- # # put all of the separate data together into a dictionary where the keys are
- # # trait names and values are lists of sample values
- # for trait_counter in range(trait_count):
- # trait_name = trait_sample_data[0][trait_counter][0]
- # for chunk_counter in range(int(number_chunks)):
- # self.trait_data[trait_name] += (
- # trait_sample_data[chunk_counter][trait_counter][data_start_pos:])
-
def get_trait_info(self, trait_list=None, species=''):
- # Note: setting trait_list to [] is probably not a great idea.
+ # Note: setting trait_list to [] is probably not a great idea.
if not trait_list:
trait_list = []
@@ -1169,7 +969,7 @@ class MrnaAssayDataSet(DataSet):
#print("query is:", pf(query))
result = g.db.execute(query).fetchone()
-
+
mean = result[0] if result else 0
if mean:
@@ -1190,28 +990,15 @@ class MrnaAssayDataSet(DataSet):
Geno.SpeciesId = Species.Id
""".format(species, this_trait.locus)
result = g.db.execute(query).fetchone()
-
+
if result:
- #if result[0] and result[1]:
- # lrs_chr = result[0]
- # lrs_mb = result[1]
lrs_chr, lrs_mb = result
#XZ: LRS_location_value is used for sorting
lrs_location_value = self.convert_location_to_value(lrs_chr, lrs_mb)
-
- #try:
- # lrs_location_value = int(lrs_chr)*1000 + float(lrs_mb)
- #except:
- # if lrs_chr.upper() == 'X':
- # lrs_location_value = 20*1000 + float(lrs_mb)
- # else:
- # lrs_location_value = (ord(str(LRS_chr).upper()[0])*1000 +
- # float(lrs_mb))
-
this_trait.LRS_score_repr = '%3.1f' % this_trait.lrs
this_trait.LRS_score_value = this_trait.lrs
this_trait.LRS_location_repr = 'Chr%s: %.6f' % (lrs_chr, float(lrs_mb))
-
+
def convert_location_to_value(self, chromosome, mb):
try:
@@ -1222,7 +1009,7 @@ class MrnaAssayDataSet(DataSet):
else:
location_value = (ord(str(chromosome).upper()[0])*1000 +
float(mb))
-
+
return location_value
def get_sequence(self):
@@ -1239,7 +1026,7 @@ class MrnaAssayDataSet(DataSet):
""" % (escape(self.name), escape(self.dataset.name))
results = g.db.execute(query).fetchone()
return results[0]
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1260,8 +1047,8 @@ class MrnaAssayDataSet(DataSet):
results = g.db.execute(query).fetchall()
#print("RETRIEVED RESULTS HERE:", results)
return results
-
-
+
+
def retrieve_genes(self, column_name):
query = """
select ProbeSet.Name, ProbeSet.%s
@@ -1270,37 +1057,8 @@ class MrnaAssayDataSet(DataSet):
ProbeSetXRef.ProbeSetId=ProbeSet.Id;
""" % (column_name, escape(str(self.id)))
results = g.db.execute(query).fetchall()
-
- return dict(results)
- #def retrieve_gene_symbols(self):
- # query = """
- # select ProbeSet.Name, ProbeSet.Symbol, ProbeSet.GeneId
- # from ProbeSet,ProbeSetXRef
- # where ProbeSetXRef.ProbeSetFreezeId = %s and
- # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- # """ % (self.id)
- # results = g.db.execute(query).fetchall()
- # symbol_dict = {}
- # for item in results:
- # symbol_dict[item[0]] = item[1]
- # return symbol_dict
- #
- #def retrieve_gene_ids(self):
- # query = """
- # select ProbeSet.Name, ProbeSet.GeneId
- # from ProbeSet,ProbeSetXRef
- # where ProbeSetXRef.ProbeSetFreezeId = %s and
- # ProbeSetXRef.ProbeSetId=ProbeSet.Id;
- # """ % (self.id)
- # return process_and_run_query(query)
- # results = g.db.execute(query).fetchall()
- # symbol_dict = {}
- # for item in results:
- # symbol_dict[item[0]] = item[1]
- # return symbol_dict
-
-
+ return dict(results)
class TempDataSet(DataSet):
@@ -1322,8 +1080,8 @@ class TempDataSet(DataSet):
self.id = 1
self.fullname = 'Temporary Storage'
self.shortname = 'Temp'
-
-
+
+
@staticmethod
def handle_pca(desc):
if 'PCA' in desc:
@@ -1332,13 +1090,13 @@ class TempDataSet(DataSet):
else:
desc = desc[:desc.index('entered')].strip()
return desc
-
+
def get_desc(self):
g.db.execute('SELECT description FROM Temp WHERE Name=%s', self.name)
desc = g.db.fetchone()[0]
desc = self.handle_pca(desc)
- return desc
-
+ return desc
+
def get_group(self):
self.cursor.execute("""
SELECT
@@ -1351,7 +1109,7 @@ class TempDataSet(DataSet):
""", self.name)
self.group, self.group_id = self.cursor.fetchone()
#return self.group
-
+
def retrieve_sample_data(self, trait):
query = """
SELECT
@@ -1365,7 +1123,7 @@ class TempDataSet(DataSet):
Order BY
Strain.Name
""" % escape(trait.name)
-
+
results = g.db.execute(query).fetchall()
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
new file mode 100755
index 00000000..cdce119a
--- /dev/null
+++ b/wqflask/base/webqtlConfig.py
@@ -0,0 +1,84 @@
+#########################################'
+# Environment Variables - public
+#
+# Note: much of this needs to handled by the settings/environment
+# scripts. But rather than migrating everything in one go, we'll
+# take it a step at a time. First the hard coded paths get replaced
+# with those in utility/tools.py
+#
+#########################################
+
+from utility.tools import mk_dir, assert_dir, flat_files, TEMPDIR
+
+#Debug Level
+#1 for debug, mod python will reload import each time
+DEBUG = 1
+
+#USER privilege
+USERDICT = {'guest':1,'user':2, 'admin':3, 'root':4}
+
+#minimum number of informative strains
+KMININFORMATIVE = 5
+
+#maximum number of traits for interval mapping
+MULTIPLEMAPPINGLIMIT = 11
+
+#maximum number of traits for correlation
+MAXCORR = 100
+
+#Daily download limit from one IP
+DAILYMAXIMUM = 1000
+
+#maximum LRS value
+MAXLRS = 460.0
+
+#temporary data life span
+MAXLIFE = 86400
+
+#MINIMUM Database public value
+PUBLICTHRESH = 0
+
+#NBCI address
+NCBI_LOCUSID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene&cmd=Retrieve&dopt=Graphics&list_uids=%s"
+UCSC_REFSEQ = "http://genome.cse.ucsc.edu/cgi-bin/hgGene?db=%s&hgg_gene=%s&hgg_chrom=chr%s&hgg_start=%s&hgg_end=%s"
+GENBANK_ID = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=Nucleotide&cmd=search&doptcmdl=DocSum&term=%s"
+OMIM_ID = "http://www.ncbi.nlm.nih.gov/omim/%s"
+UNIGEN_ID = "http://www.ncbi.nlm.nih.gov/UniGene/clust.cgi?ORG=%s&CID=%s";
+HOMOLOGENE_ID = "http://www.ncbi.nlm.nih.gov/sites/entrez?Db=homologene&Cmd=DetailsSearch&Term=%s"
+PUBMEDLINK_URL = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&list_uids=%s&dopt=Abstract"
+UCSC_POS = "http://genome.ucsc.edu/cgi-bin/hgTracks?clade=mammal&org=%s&db=%s&position=chr%s:%s-%s&pix=800&Submit=submit"
+UCSC_BLAT = 'http://genome.ucsc.edu/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
+UTHSC_BLAT = 'http://ucscbrowser.genenetwork.org/cgi-bin/hgBlat?org=%s&db=%s&type=0&sort=0&output=0&userSeq=%s'
+UCSC_GENOME = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=http://web2qtl.utmem.edu:88/snp/chr%s"
+ENSEMBLE_BLAT = 'http://www.ensembl.org/Mus_musculus/featureview?type=AffyProbe&id=%s'
+DBSNP = 'http://www.ncbi.nlm.nih.gov/SNP/snp_ref.cgi?type=rs&rs=%s'
+UCSC_RUDI_TRACK_URL = " http://genome.cse.ucsc.edu/cgi-bin/hgTracks?org=%s&db=%s&hgt.customText=http://gbic.biol.rug.nl/~ralberts/tracks/%s/%s"
+GENOMEBROWSER_URL="http://ucscbrowser.genenetwork.org/cgi-bin/hgTracks?clade=mammal&org=Mouse&db=mm9&position=%s&hgt.suggest=&pix=800&Submit=submit"
+ENSEMBLETRANSCRIPT_URL="http://useast.ensembl.org/Mus_musculus/Lucene/Details?species=Mus_musculus;idx=Transcript;end=1;q=%s"
+
+# HTMLPATH = GNROOT + 'genotype_files/'
+# PYLMM_PATH
+# IMGDIR = GNROOT + '/wqflask/wqflask/static/output/'
+
+# Temporary storage:
+TMPDIR = mk_dir(TEMPDIR+'/gn2/')
+CACHEDIR = mk_dir(TEMPDIR+'/cache/')
+# We can no longer write into the git tree:
+GENERATED_IMAGE_DIR = mk_dir(TMPDIR+'/generate/')
+GENERATED_TEXT_DIR = mk_dir(TMPDIR+'/generate_text/')
+
+# Flat file directories
+GENODIR = flat_files('genotype')+'/'
+JSON_GENODIR = assert_dir(GENODIR+'json/')
+
+# SITENAME = 'GN'
+# PORTADDR = "http://50.16.251.170"
+# BASEHREF = '<base href="http://50.16.251.170/">'
+
+INFOPAGEHREF = '/dbdoc/%s.html'
+CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
+SCRIPTFILE = 'main.py'
+
+# GLOSSARYFILE = "/glossary.html"
+# REFRESHSTR = '<meta http-equiv="refresh" content="5;url=%s' + SCRIPTFILE +'?sid=%s">'
+# REFRESHDIR = '%s' + SCRIPTFILE +'?sid=%s'
diff --git a/wqflask/base/webqtlFormData.py b/wqflask/base/webqtlFormData.py
index 44fdcc3f..10251756 100755
--- a/wqflask/base/webqtlFormData.py
+++ b/wqflask/base/webqtlFormData.py
@@ -157,7 +157,7 @@ class webqtlFormData(object):
self.genotype_1 = reaper.Dataset()
- full_filename = os.path.join(webqtlConfig.GENODIR, self.group + '.geno')
+ full_filename = locate(self.group + '.geno','genotype')
# reaper barfs on unicode filenames, so here we ensure it's a string
full_filename = str(full_filename)
diff --git a/wqflask/basicStatistics/BasicStatisticsFunctions.py b/wqflask/basicStatistics/BasicStatisticsFunctions.py
index 74784853..e748a822 100755
--- a/wqflask/basicStatistics/BasicStatisticsFunctions.py
+++ b/wqflask/basicStatistics/BasicStatisticsFunctions.py
@@ -118,7 +118,7 @@ def plotNormalProbability(vals=None, RISet='', title=None, showstrains=0, specia
Plot.plotXY(c, dataZ, dataX, dataLabel = dataLabel, XLabel='Expected Z score', connectdot=0, YLabel='Trait value', title=title, specialCases=specialStrains, showLabel = showLabel)
filename= webqtlUtil.genRandStr("nP_")
- c.save(webqtlConfig.IMGDIR+filename, format='gif')
+ c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
img=HT.Image('/image/'+filename+'.gif',border=0)
@@ -145,7 +145,7 @@ def plotBoxPlot(vals):
Plot.plotBoxPlot(canvas, XXX, offset=(xLeftOffset, xRightOffset, yTopOffset, yBottomOffset), XLabel= "Trait")
filename= webqtlUtil.genRandStr("Box_")
- canvas.save(webqtlConfig.IMGDIR+filename, format='gif')
+ canvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
img=HT.Image('/image/'+filename+'.gif',border=0)
plotLink = HT.Span("More about ", HT.Href(text="Box Plots", url="http://davidmlane.com/hyperstat/A37797.html", target="_blank", Class="fs13"))
@@ -201,7 +201,7 @@ def plotBarGraph(identification='', RISet='', vals=None, type="name"):
Plot.plotBarText(c, tvals, tnames, variance=tvars, YLabel='Value', title=title, sLabel = sLabel, barSpace = sw)
filename= webqtlUtil.genRandStr("Bar_")
- c.save(webqtlConfig.IMGDIR+filename, format='gif')
+ c.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
img=HT.Image('/image/'+filename+'.gif',border=0)
return img
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index e080050e..489d291f 100755
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -36,8 +36,7 @@ from __future__ import print_function, division
#print("cdict is:", cdict)
import sys
-sys.path.append("/home/zas1024/")
-import zach_settings
+import zach_settings # no hard code paths!
import MySQLdb
diff --git a/wqflask/maintenance/get_group_samplelists.py b/wqflask/maintenance/get_group_samplelists.py
index b8397b47..a9059fad 100755
--- a/wqflask/maintenance/get_group_samplelists.py
+++ b/wqflask/maintenance/get_group_samplelists.py
@@ -6,7 +6,6 @@ import gzip
from base import webqtlConfig
-
def process_genofiles(geno_dir=webqtlConfig.GENODIR):
print("Yabba")
#sys.exit("Dabba")
@@ -54,4 +53,4 @@ def get_samplelist_from_plink(genofilename):
line = line.split(" ")
samplelist.append(line[0])
- return samplelist \ No newline at end of file
+ return samplelist
diff --git a/wqflask/runserver.py b/wqflask/runserver.py
index 59ebf0d4..e4392b3f 100755
--- a/wqflask/runserver.py
+++ b/wqflask/runserver.py
@@ -25,8 +25,8 @@ file_handler = logging.FileHandler(app.config['LOGFILE'])
file_handler.setLevel(logging.DEBUG)
app.logger.addHandler(file_handler)
-import logging_tree
-logging_tree.printout()
+# import logging_tree
+# logging_tree.printout()
app.run(host='0.0.0.0',
port=app.config['SERVER_PORT'],
diff --git a/wqflask/utility/external.py b/wqflask/utility/external.py
new file mode 100644
index 00000000..50afea08
--- /dev/null
+++ b/wqflask/utility/external.py
@@ -0,0 +1,9 @@
+# Call external program
+
+import os
+import sys
+import subprocess
+
+def shell(command):
+ if subprocess.call(command, shell=True) != 0:
+ raise Exception("ERROR: failed on "+command)
diff --git a/wqflask/utility/genofile_parser.py b/wqflask/utility/genofile_parser.py
new file mode 100644
index 00000000..67b84dc9
--- /dev/null
+++ b/wqflask/utility/genofile_parser.py
@@ -0,0 +1,100 @@
+# CTL analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+
+from __future__ import print_function, division, absolute_import
+import sys
+import os
+import glob
+import traceback
+import gzip
+
+
+import simplejson as json
+
+from pprint import pformat as pf
+
+class Marker(object):
+ def __init__(self):
+ self.name = None
+ self.chr = None
+ self.cM = None
+ self.Mb = None
+ self.genotypes = []
+
+
+class ConvertGenoFile(object):
+
+ def __init__(self, input_file):
+ self.mb_exists = False
+ self.cm_exists = False
+ self.markers = []
+
+ self.latest_row_pos = None
+ self.latest_col_pos = None
+
+ self.latest_row_value = None
+ self.latest_col_value = None
+ self.input_fh = open(input_file)
+ print("!!!!!!!!!!!!!!!!PARSER!!!!!!!!!!!!!!!!!!")
+ self.haplotype_notation = {
+ '@mat': "1",
+ '@pat': "2",
+ '@het': "-999",
+ '@unk': "-999"
+ }
+ self.configurations = {}
+
+ def process_rows(self):
+ for self.latest_row_pos, row in enumerate(self.input_fh):
+ self.latest_row_value = row
+ # Take care of headers
+ if not row.strip():
+ continue
+ if row.startswith('#'):
+ continue
+ if row.startswith('Chr'):
+ if 'Mb' in row.split():
+ self.mb_exists = True
+ if 'cM' in row.split():
+ self.cm_exists = True
+ skip = 2 + self.cm_exists + self.mb_exists
+ self.individuals = row.split()[skip:]
+ continue
+ if row.startswith('@'):
+ key, _separater, value = row.partition(':')
+ key = key.strip()
+ value = value.strip()
+ if key in self.haplotype_notation:
+ self.configurations[value] = self.haplotype_notation[key]
+ continue
+ if not len(self.configurations):
+ raise EmptyConfigurations
+ yield row
+
+ def process_csv(self):
+ for row_count, row in enumerate(self.process_rows()):
+ row_items = row.split("\t")
+
+ this_marker = Marker()
+ this_marker.name = row_items[1]
+ this_marker.chr = row_items[0]
+ if self.cm_exists and self.mb_exists:
+ this_marker.cM = row_items[2]
+ this_marker.Mb = row_items[3]
+ genotypes = row_items[4:]
+ elif self.cm_exists:
+ this_marker.cM = row_items[2]
+ genotypes = row_items[3:]
+ elif self.mb_exists:
+ this_marker.Mb = row_items[2]
+ genotypes = row_items[3:]
+ else:
+ genotypes = row_items[2:]
+ for item_count, genotype in enumerate(genotypes):
+ if genotype.upper().strip() in self.configurations:
+ this_marker.genotypes.append(self.configurations[genotype.upper().strip()])
+ else:
+ print("WARNING:", genotype.upper())
+ this_marker.genotypes.append("NA")
+ self.markers.append(this_marker.__dict__)
+
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index b8a41f60..dd8c4a1e 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -1,84 +1,137 @@
# Tools/paths finder resolves external paths from settings and/or environment
# variables
-#
-# Currently supported:
-#
-# PYLMM_PATH finds the root of the git repository of the pylmm_gn2 tool
import os
import sys
from wqflask import app
-def get_setting(id,default,guess,get_valid_path):
- """
- Resolve a setting from the environment or the global settings in app.config
+def get_setting(command_id,guess=None):
+ """Resolve a setting from the environment or the global settings in
+ app.config, with get_valid_path is a function checking whether the
+ path points to an expected directory and returns the full path to
+ the binary command
+
+ guess = os.environ.get('HOME')+'/pylmm'
+ get_setting('PYLMM_PATH',guess)
+
+ first tries the environment variable in +id+, next gets the Flask
+ app setting for the same +id+ and finally does an educated
+ +guess+.
+
+ In all, the environment overrides the others, next is the flask
+ setting, then the guess. A valid path to the binary command is
+ returned. If none is resolved an exception is thrown.
+
+ Note that we do not use the system path. This is on purpose
+ because it will mess up controlled (reproducible) deployment. The
+ proper way is to either use the GNU Guix defaults as listed in
+ etc/default_settings.py or override them yourself by creating a
+ different settings.py file (or setting the environment).
+
"""
+ def value(command):
+ if command:
+ sys.stderr.write("Found path "+command+"\n")
+ return command
+ else:
+ return None
+
# ---- Check whether environment exists
- path = get_valid_path(os.environ.get(id))
- # ---- Check whether setting exists
- setting = app.config.get(id)
- if not path:
- path = get_valid_path(setting)
- # ---- Check whether default exists
- if not path:
- path = get_valid_path(default)
- # ---- Guess directory
- if not path:
- if not setting:
- setting = guess
- path = get_valid_path(guess)
- if not path:
- raise Exception(id+' '+setting+' path unknown or faulty (update settings.py?). '+id+' should point to the root of the git repository')
-
- return path
-
-def pylmm_command(default=None):
+ sys.stderr.write("Looking for "+command_id+"\n")
+ command = value(os.environ.get(command_id))
+ if not command:
+ # ---- Check whether setting exists in app
+ command = value(app.config.get(command_id))
+ if not command:
+ command = value(guess)
+ if not command:
+ raise Exception(command_id+' path unknown or faulty (update settings.py?). '+command_id+' should point to the path')
+ return command
+
+def valid_bin(bin):
+ if os.path.islink(bin) or valid_file(bin):
+ return bin
+ return None
+
+def valid_file(fn):
+ if os.path.isfile(fn):
+ return fn
+ return None
+
+def valid_path(dir):
+ if os.path.isdir(dir):
+ return dir
+ return None
+
+def pylmm_command(guess=None):
+ return valid_bin(get_setting("PYLMM_COMMAND",guess))
+
+def gemma_command(guess=None):
+ return valid_bin(get_setting("GEMMA_COMMAND",guess))
+
+def plink_command(guess=None):
+ return valid_bin(get_setting("PLINK_COMMAND",guess))
+
+def flat_files(subdir=None):
+ base = get_setting("GENENETWORK_FILES")
+ if subdir:
+ return assert_dir(base+"/"+subdir)
+ return assert_dir(base)
+
+def assert_dir(dir):
+ if not valid_path(dir):
+ raise Exception("ERROR: can not find directory "+dir)
+ return dir
+
+def mk_dir(dir):
+ if not valid_path(dir):
+ os.makedirs(dir)
+ return assert_dir(dir)
+
+def locate(name, subdir=None):
"""
- Return the path to the repository and the python command to call
+ Locate a static flat file in the GENENETWORK_FILES environment.
+
+ This function throws an error when the file is not found.
"""
- def get_valid_path(path):
- """Test for a valid repository"""
- if path:
- sys.stderr.write("Trying PYLMM_PATH in "+path+"\n")
- if path and os.path.isfile(path+'/pylmm_gn2/lmm.py'):
- return path
+ base = get_setting("GENENETWORK_FILES")
+ if subdir:
+ base = base+"/"+subdir
+ if valid_path(base):
+ lookfor = base + "/" + name
+ if valid_file(lookfor):
+ print("Found: file "+lookfor+"\n")
+ return lookfor
else:
- None
+ raise Exception("Can not locate "+lookfor)
+ if subdir: sys.stderr.write(subdir)
+ raise Exception("Can not locate "+name+" in "+base)
- guess = os.environ.get('HOME')+'/pylmm_gn2'
- path = get_setting('PYLMM_PATH',default,guess,get_valid_path)
- pylmm_command = 'python '+path+'/pylmm_gn2/lmm.py'
- return path,pylmm_command
-
-def plink_command(default=None):
+def locate_ignore_error(name, subdir=None):
"""
- Return the path to the repository and the python command to call
+ Locate a static flat file in the GENENETWORK_FILES environment.
+
+ This function does not throw an error when the file is not found
+ but returns None.
"""
- def get_valid_path(path):
- """Test for a valid repository"""
- if path:
- sys.stderr.write("Trying PLINK_PATH in "+path+"\n")
- if path and os.path.isfile(path+'/plink'):
- return path
- else:
- None
-
- guess = os.environ.get('HOME')+'/plink_gemma'
- path = get_setting('PLINK_PATH',default,guess,get_valid_path)
- plink_command = path+'/plink'
- return path,plink_command
-
-def gemma_command(default=None):
- def get_valid_path(path):
- """Test for a valid repository"""
- if path:
- sys.stderr.write("Trying PLINK_PATH in "+path+"\n")
- if path and os.path.isfile(path+'/plink'):
- return path
- else:
- None
+ base = get_setting("GENENETWORK_FILES")
+ if subdir:
+ base = base+"/"+subdir
+ if valid_path(base):
+ lookfor = base + "/" + name
+ if valid_file(lookfor):
+ print("Found: file "+name+"\n")
+ return lookfor
+ sys.stderr.write("WARNING: file "+name+" not found\n")
+ return None
+
+def tempdir():
+ return valid_path(get_setting("TEMPDIR","/tmp"))
- guess = os.environ.get('HOME')+'/plink'
- path = get_setting('PLINK_PATH',default,guess,get_valid_path)
- gemma_command = path+'/gemma'
- return path, gemma_command \ No newline at end of file
+
+# Cached values
+PYLMM_COMMAND = pylmm_command()
+GEMMA_COMMAND = gemma_command()
+PLINK_COMMAND = plink_command()
+FLAT_FILES = flat_files()
+TEMPDIR = tempdir()
diff --git a/wqflask/wqflask/correlation/show_corr_results.py b/wqflask/wqflask/correlation/show_corr_results.py
index 98596ca4..dd661092 100755
--- a/wqflask/wqflask/correlation/show_corr_results.py
+++ b/wqflask/wqflask/correlation/show_corr_results.py
@@ -708,13 +708,11 @@ class CorrelationResults(object):
for sample in sample_names:
if sample not in excluded_samples:
- value = start_vars['value:' + sample]
- if value.strip().lower() == 'x':
- self.sample_data[str(sample)] = None
- else:
- self.sample_data[str(sample)] = float(value)
-
-
+ # print("Looking for",sample,"in",start_vars)
+ value = start_vars.get('value:' + sample)
+ if value:
+ if not value.strip().lower() == 'x':
+ self.sample_data[str(sample)] = float(value)
##XZ, 12/16/2008: the input geneid is of mouse type
#def checkForLitInfo(self,geneId):
@@ -942,7 +940,7 @@ class CorrelationResults(object):
use_tissue_corr = True
DatabaseFileName = self.getFileName( target_db_name=self.target_db_name )
- datasetFile = open(webqtlConfig.TEXTDIR+DatabaseFileName,'r')
+ datasetFile = open(webqtlConfig.CACHEDIR+DatabaseFileName,'r')
#XZ, 01/08/2009: read the first line
line = datasetFile.readline()
diff --git a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
index 6bc0ef77..f74e655d 100755
--- a/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
+++ b/wqflask/wqflask/correlation_matrix/show_corr_matrix.py
@@ -43,7 +43,6 @@ from pprint import pformat as pf
from htmlgen import HTMLgen2 as HT
import reaper
-from base import webqtlConfig
from utility.THCell import THCell
from utility.TDCell import TDCell
from base.trait import GeneralTrait
diff --git a/wqflask/wqflask/ctl/__init__.py b/wqflask/wqflask/ctl/__init__.py
new file mode 100755
index 00000000..e69de29b
--- /dev/null
+++ b/wqflask/wqflask/ctl/__init__.py
diff --git a/wqflask/wqflask/ctl/ctl_analysis.py b/wqflask/wqflask/ctl/ctl_analysis.py
new file mode 100644
index 00000000..d79d9bfe
--- /dev/null
+++ b/wqflask/wqflask/ctl/ctl_analysis.py
@@ -0,0 +1,194 @@
+# CTL analysis for GN2
+# Author / Maintainer: Danny Arends <Danny.Arends@gmail.com>
+import sys
+from numpy import *
+import scipy as sp # SciPy
+import rpy2.robjects as ro # R Objects
+import rpy2.rinterface as ri
+
+from base.webqtlConfig import GENERATED_IMAGE_DIR
+from utility import webqtlUtil # Random number for the image
+from utility import genofile_parser # genofile_parser
+
+import base64
+import array
+import csv
+import itertools
+
+from base import data_set
+from base import trait as TRAIT
+
+from utility import helper_functions
+from utility.tools import locate
+
+from rpy2.robjects.packages import importr
+utils = importr("utils")
+
+## Get pointers to some common R functions
+r_library = ro.r["library"] # Map the library function
+r_options = ro.r["options"] # Map the options function
+r_read_csv = ro.r["read.csv"] # Map the read.csv function
+r_dim = ro.r["dim"] # Map the dim function
+r_c = ro.r["c"] # Map the c function
+r_t = ro.r["t"] # Map the t function
+r_cat = ro.r["cat"] # Map the cat function
+r_paste = ro.r["paste"] # Map the paste function
+r_unlist = ro.r["unlist"] # Map the unlist function
+r_head = ro.r["head"] # Map the unlist function
+r_unique = ro.r["unique"] # Map the unique function
+r_length = ro.r["length"] # Map the length function
+r_unlist = ro.r["unlist"] # Map the unlist function
+r_list = ro.r.list # Map the list function
+r_matrix = ro.r.matrix # Map the matrix function
+r_seq = ro.r["seq"] # Map the seq function
+r_table = ro.r["table"] # Map the table function
+r_names = ro.r["names"] # Map the names function
+r_sink = ro.r["sink"] # Map the sink function
+r_is_NA = ro.r["is.na"] # Map the is.na function
+r_file = ro.r["file"] # Map the file function
+r_png = ro.r["png"] # Map the png function for plotting
+r_dev_off = ro.r["dev.off"] # Map the dev.off function
+r_save_image = ro.r["save.image"] # Map the save.image function
+r_class = ro.r["class"] # Map the class function
+r_save = ro.r["save"] # Map the save function
+r_write_table = ro.r["write.table"] # Map the write.table function
+r_read_table = ro.r["read.table"] # Map the read.table function
+r_as_data_frame = ro.r["as.data.frame"] # Map the write.table function
+r_data_frame = ro.r["data.frame"] # Map the write.table function
+r_as_numeric = ro.r["as.numeric"] # Map the write.table function
+
+class CTL(object):
+ def __init__(self):
+ print("Initialization of CTL")
+ #log = r_file("/tmp/genenetwork_ctl.log", open = "wt")
+ #r_sink(log) # Uncomment the r_sink() commands to log output from stdout/stderr to a file
+ #r_sink(log, type = "message")
+ r_library("ctl") # Load CTL - Should only be done once, since it is quite expensive
+ r_options(stringsAsFactors = False)
+ print("Initialization of CTL done, package loaded in R session")
+ self.r_CTLscan = ro.r["CTLscan"] # Map the CTLscan function
+ self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function
+ self.r_lineplot = ro.r["ctl.lineplot"] # Map the ctl.lineplot function
+ self.r_CTLsignificant = ro.r["CTLsignificant"] # Map the CTLsignificant function
+ self.r_CTLnetwork = ro.r["CTLnetwork"] # Map the CTLnetwork function
+ self.r_CTLprofiles = ro.r["CTLprofiles"] # Map the CTLprofiles function
+ self.r_plotCTLobject = ro.r["plot.CTLobject"] # Map the CTLsignificant function
+ print("Obtained pointers to CTL functions")
+
+ def run_analysis(self, requestform):
+ print("Starting CTL analysis on dataset")
+ self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
+ self.trait_db_list = [x for x in self.trait_db_list if x]
+
+ print("strategy:", requestform.get("strategy"))
+ strategy = requestform.get("strategy")
+
+ print("nperm:", requestform.get("nperm"))
+ nperm = int(requestform.get("nperm"))
+
+ print("parametric:", requestform.get("parametric"))
+ parametric = bool(requestform.get("parametric"))
+
+ print("significance:", requestform.get("significance"))
+ significance = float(requestform.get("significance"))
+
+ # Get the name of the .geno file belonging to the first phenotype
+ datasetname = self.trait_db_list[0].split(":")[1]
+ dataset = data_set.create_dataset(datasetname)
+
+ genofilelocation = locate(dataset.group.name + ".geno", "genotype")
+ parser = genofile_parser.ConvertGenoFile(genofilelocation)
+ parser.process_csv()
+
+ # Create a genotype matrix
+ individuals = parser.individuals
+ markers = []
+ markernames = []
+ for marker in parser.markers:
+ markernames.append(marker["name"])
+ markers.append(marker["genotypes"])
+
+ genotypes = list(itertools.chain(*markers))
+ print(len(genotypes) / len(individuals), "==", len(parser.markers))
+
+ rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))
+
+ # Create a phenotype matrix
+ traits = []
+ for trait in self.trait_db_list:
+ print("retrieving data for", trait)
+ if trait != "":
+ ts = trait.split(':')
+ gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
+ gt.retrieve_sample_data(individuals)
+ for ind in individuals:
+ if ind in gt.data.keys():
+ traits.append(gt.data[ind].value)
+ else:
+ traits.append("-999")
+
+ rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))
+
+ # Use a data frame to store the objects
+ rPheno = r_data_frame(rPheno)
+ rGeno = r_data_frame(rGeno)
+
+ # Debug: Print the genotype and phenotype files to disk
+ #r_write_table(rGeno, "~/outputGN/geno.csv")
+ #r_write_table(rPheno, "~/outputGN/pheno.csv")
+
+ # Perform the CTL scan
+ res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)
+
+ # Get significant interactions
+ significant = self.r_CTLsignificant(res, significance = significance)
+
+ # Create an image for output
+ self.results = {}
+ self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
+ self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
+
+ self.results['ctlresult'] = significant
+ self.results['requestform'] = requestform # Store the user specified parameters for the output page
+
+ # Create the lineplot
+ r_png(self.results['imgloc1'], width=1000, height=600)
+ self.r_lineplot(res, significance = significance)
+ r_dev_off()
+
+ n = 2
+ for trait in self.trait_db_list:
+ # Create the QTL like CTL plots
+ self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
+ self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
+ r_png(self.results['imgloc' + str(n)], width=1000, height=600)
+ self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
+ r_dev_off()
+ n = n + 1
+
+ # Flush any output from R
+ sys.stdout.flush()
+
+ def loadImage(self, path, name):
+ print("pre-loading imgage results:", self.results[path])
+ imgfile = open(self.results[path], 'rb')
+ imgdata = imgfile.read()
+ imgB64 = imgdata.encode("base64")
+ bytesarray = array.array('B', imgB64)
+ self.results[name] = bytesarray
+
+ def render_image(self, results):
+ self.loadImage("imgloc1", "imgdata1")
+ n = 2
+ for trait in self.trait_db_list:
+ self.loadImage("imgloc" + str(n), "imgdata" + str(n))
+ n = n + 1
+
+ def process_results(self, results):
+ print("Processing CTL output")
+ template_vars = {}
+ template_vars["results"] = self.results
+ self.render_image(self.results)
+ sys.stdout.flush()
+ return(dict(template_vars))
+
diff --git a/wqflask/wqflask/database.py b/wqflask/wqflask/database.py
index 159c5d6c..2f544d44 100755
--- a/wqflask/wqflask/database.py
+++ b/wqflask/wqflask/database.py
@@ -24,8 +24,9 @@ def init_db():
# you will have to import them first before calling init_db()
#import yourapplication.models
import wqflask.model
- print("Creating all..")
+ print("database.py: Creating all model metadata..")
Base.metadata.create_all(bind=engine)
- print("Done creating all...")
+ print("database.py: Done creating all model metadata...")
+ print("Point your browser at http://localhost:5003/")
init_db()
diff --git a/wqflask/wqflask/heatmap/heatmap.py b/wqflask/wqflask/heatmap/heatmap.py
index 40f518f0..2445b37f 100644
--- a/wqflask/wqflask/heatmap/heatmap.py
+++ b/wqflask/wqflask/heatmap/heatmap.py
@@ -26,13 +26,12 @@ import reaper
from base.trait import GeneralTrait
from base import data_set
from base import species
-from base import webqtlConfig
-from utility import webqtlUtil
# from wqflask.my_pylmm.pyLMM import lmm
# from wqflask.my_pylmm.pyLMM import input
from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
+from utility.tools import PYLMM_COMMAND
from MySQLdb import escape_string as escape
@@ -214,7 +213,7 @@ class Heatmap(object):
#Redis.expire(key, 60*60)
#print("before printing command")
#
- #command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ #command = 'python lmm.py --key {} --species {}'.format(key,
# "other")
#print("command is:", command)
#print("after printing command")
@@ -273,7 +272,7 @@ class Heatmap(object):
Redis.expire(key, 60*60)
print("before printing command")
- command = 'python /home/zas1024/gene/wqflask/wqflask/my_pylmm/pyLMM/lmm.py --key {} --species {}'.format(key,
+ command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
"other")
print("command is:", command)
print("after printing command")
diff --git a/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py b/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py
index ec9aa29c..f45ec0c4 100755
--- a/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py
+++ b/wqflask/wqflask/interval_analyst/IntervalAnalystPage.py
@@ -45,40 +45,40 @@ class IntervalAnalystPage(templatePage):
#A dictionary that lets us map the html form names "txStart_mm6" -> "Mb Start (mm8)"
#the first item is the short name (column headers) and the second item is the long name (dropdown list)
# [short name, long name, category]
- columnNames = {"GeneSymbol" : ["Gene", "Gene Name", 'gene'],
+ columnNames = {"GeneSymbol" : ["Gene", "Gene Name", 'gene'],
"GeneDescription" : ["Description", "Gene Description", 'species'],
- 'GeneNeighborsCount' : ["Neighbors", "Gene Neighbors", 'gene'],
- 'GeneNeighborsRange' : ["Neighborhood", "Gene Neighborhood (Mb)", 'gene'],
- 'GeneNeighborsDensity' : ["Gene Density", "Gene Density (Neighbors/Mb)", 'gene'],
+ 'GeneNeighborsCount' : ["Neighbors", "Gene Neighbors", 'gene'],
+ 'GeneNeighborsRange' : ["Neighborhood", "Gene Neighborhood (Mb)", 'gene'],
+ 'GeneNeighborsDensity' : ["Gene Density", "Gene Density (Neighbors/Mb)", 'gene'],
"ProteinID" : ["Prot ID", "Protein ID", 'protein'],
- "Chromosome" : ["Chr", "Chromosome", 'species'],
- "TxStart" : ["Start", "Mb Start", 'species'],
- "TxEnd" : ["End", "Mb End", 'species'],
- "GeneLength" : ["Length", "Kb Length", 'species'],
- "cdsStart" : ["CDS Start", "Mb CDS Start", 'species'],
+ "Chromosome" : ["Chr", "Chromosome", 'species'],
+ "TxStart" : ["Start", "Mb Start", 'species'],
+ "TxEnd" : ["End", "Mb End", 'species'],
+ "GeneLength" : ["Length", "Kb Length", 'species'],
+ "cdsStart" : ["CDS Start", "Mb CDS Start", 'species'],
"cdsEnd" : ["CDS End", "Mb CDS End", 'species'],
- "exonCount" : ["Num Exons", "Exon Count", 'species'],
- "exonStarts" : ["Exon Starts", "Exon Starts", 'species'],
- "exonEnds" : ["Exon Ends", "Exon Ends", 'species'],
- "Strand" : ["Strand", "Strand", 'species'],
+ "exonCount" : ["Num Exons", "Exon Count", 'species'],
+ "exonStarts" : ["Exon Starts", "Exon Starts", 'species'],
+ "exonEnds" : ["Exon Ends", "Exon Ends", 'species'],
+ "Strand" : ["Strand", "Strand", 'species'],
"GeneID" : ["Gene ID", "Gene ID", 'species'],
- "GenBankID" : ["GenBank", "GenBank ID", 'species'],
+ "GenBankID" : ["GenBank", "GenBank ID", 'species'],
"UnigenID" : ["Unigen", "Unigen ID", 'species'],
- "NM_ID" : ["NM ID", "NM ID", 'species'],
+ "NM_ID" : ["NM ID", "NM ID", 'species'],
"kgID" : ["kg ID", "kg ID", 'species'],
- "snpCount" : ["SNPs", "SNP Count", 'species'],
- "snpDensity" : ["SNP Density", "SNP Density", 'species'],
- "lrs" : ["LRS", "Likelihood Ratio Statistic", 'misc'],
- "lod" : ["LOD", "Likelihood Odds Ratio", 'misc'],
- "pearson" : ["Pearson", "Pearson Product Moment", 'misc'],
- "literature" : ["Lit Corr", "Literature Correlation", 'misc'],
+ "snpCount" : ["SNPs", "SNP Count", 'species'],
+ "snpDensity" : ["SNP Density", "SNP Density", 'species'],
+ "lrs" : ["LRS", "Likelihood Ratio Statistic", 'misc'],
+ "lod" : ["LOD", "Likelihood Odds Ratio", 'misc'],
+ "pearson" : ["Pearson", "Pearson Product Moment", 'misc'],
+ "literature" : ["Lit Corr", "Literature Correlation", 'misc'],
}
###Species Freeze
speciesFreeze = {'mouse':'mm9', 'rat':'rn3', 'human':'hg19'}
for key in speciesFreeze.keys():
speciesFreeze[speciesFreeze[key]] = key
-
+
def __init__(self, fd):
templatePage.__init__(self, fd)
@@ -86,7 +86,7 @@ class IntervalAnalystPage(templatePage):
fd.formdata['remote_ip'] = fd.remote_ip
if not self.openMysql():
return
-
+
self.species = fd.formdata.getvalue("species", "mouse")
try:
self.startMb = float(fd.formdata.getvalue("startMb"))
@@ -96,7 +96,7 @@ class IntervalAnalystPage(templatePage):
self.endMb = float(fd.formdata.getvalue("endMb"))
except:
self.endMb = self.startMb + 10
-
+
self.Chr = fd.formdata.getvalue("chromosome", "1")
self.xls = fd.formdata.getvalue("xls", "1")
try:
@@ -107,38 +107,38 @@ class IntervalAnalystPage(templatePage):
self.diffColDefault = self.diffCol = []
if self.species != 'mouse':
self.diffColDefault = [2, 3]#default is B6 and D2 for other species
-
+
controlFrm, dispFields = self.genControlForm(fd)
geneTable, filename = self.genGeneTable(fd, dispFields)
-
+
infoTD = HT.TD(width=400, valign= "top")
- infoTD.append(HT.Paragraph("Interval Analyst : Chr %s" % self.Chr, Class="title"),
- HT.Strong("Species : "), self.species.title(), HT.BR(),
- HT.Strong("Database : "), "UCSC %s" % self.speciesFreeze[self.species], HT.BR(),
- HT.Strong("Range : "), "%2.6f Mb - %2.6f Mb" % (self.startMb, self.endMb), HT.BR(),
+ infoTD.append(HT.Paragraph("Interval Analyst : Chr %s" % self.Chr, Class="title"),
+ HT.Strong("Species : "), self.species.title(), HT.BR(),
+ HT.Strong("Database : "), "UCSC %s" % self.speciesFreeze[self.species], HT.BR(),
+ HT.Strong("Range : "), "%2.6f Mb - %2.6f Mb" % (self.startMb, self.endMb), HT.BR(),
)
if filename:
infoTD.append(HT.BR(), HT.BR(), HT.Href(text="Download", url = "/tmp/" + filename, Class="normalsize")
, " output in MS excel format.")
-
+
mainTable = HT.TableLite(HT.TR(infoTD, HT.TD(controlFrm, Class="doubleBorder", width=400), HT.TD("&nbsp;", width="")), cellpadding=10)
mainTable.append(HT.TR(HT.TD(geneTable, colspan=3)))
self.dict['body'] = HT.TD(mainTable)
self.dict['title'] = "Interval Analyst"
-
+
def genGeneTable(self, fd, dispFields):
filename = ""
if self.xls:
#import pyXLWriter as xl
filename = "IntAn_Chr%s_%2.6f-%2.6f" % (self.Chr, self.startMb, self.endMb)
filename += ".xls"
-
+
# Create a new Excel workbook
workbook = xl.Writer(os.path.join(webqtlConfig.TMPDIR, filename))
worksheet = workbook.add_worksheet()
titleStyle = workbook.add_format(align = 'left', bold = 0, size=18, border = 1, border_color="gray")
headingStyle = workbook.add_format(align = 'center', bold = 1, size=13, fg_color = 0x1E, color="white", border = 1, border_color="gray")
-
+
##Write title Info
worksheet.write([0, 0], "GeneNetwork Interval Analyst Table", titleStyle)
worksheet.write([1, 0], "%s%s" % (webqtlConfig.PORTADDR, os.path.join(webqtlConfig.CGIDIR, self._scriptfile)))
@@ -148,12 +148,12 @@ class IntervalAnalystPage(templatePage):
worksheet.write([4, 0], "Search by : %s" % fd.formdata['remote_ip'])
worksheet.write([5, 0], "view region : Chr %s %2.6f - %2.6f Mb" % (self.Chr, self.startMb, self.endMb))
nTitleRow = 7
-
+
geneTable = HT.TableLite(Class="collap", cellpadding=5)
headerRow = HT.TR(HT.TD(" ", Class="fs13 fwb ffl b1 cw cbrb", width="1"))
if self.xls:
worksheet.write([nTitleRow, 0], "Index", headingStyle)
-
+
for ncol, column in enumerate(dispFields):
if len(column) == 1:
headerRow.append(HT.TD(self.columnNames[column[0]][0], Class="fs13 fwb ffl b1 cw cbrb", NOWRAP=1,align="Center"))
@@ -162,24 +162,24 @@ class IntervalAnalystPage(templatePage):
worksheet.write([nTitleRow, ncol+1], colTitle, headingStyle)
worksheet.set_column([ncol+1, ncol+1], 2*len(colTitle))
else:
- headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), " (%s)" % self.speciesFreeze[column[1]],
+ headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(), " (%s)" % self.speciesFreeze[column[1]],
Class="fs13 fwb ffl b1 cw cbrb", NOWRAP=1, align="Center"))
if self.xls:
colTitle = self.columnNames[column[0]][0] + " (%s)" % self.speciesFreeze[column[1]]
worksheet.write([nTitleRow, ncol+1], colTitle, headingStyle)
worksheet.set_column([ncol+1, ncol+1], 2*len(colTitle))
- #headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(),
- # "(%s %s)" % (column[1].title(), self.speciesFreeze[column[1]]),
+ #headerRow.append(HT.TD(self.columnNames[column[0]][0], HT.BR(),
+ # "(%s %s)" % (column[1].title(), self.speciesFreeze[column[1]]),
# Class="colorBlue", NOWRAP=1, align="Center"))
geneTable.append(headerRow)
-
+
geneCol = GeneUtil.loadGenes(self.cursor, self.Chr, self.diffColDefault, self.startMb, self.endMb, species=self.species)
for gIndex, theGO in enumerate(geneCol):
geneRow = HT.TR(HT.TD(gIndex+1, Class="fs12 fwn b1", align="right"))
if self.xls:
nTitleRow += 1
worksheet.write([nTitleRow, 0], gIndex + 1)
-
+
for ncol, column in enumerate(dispFields):
if len(column) == 1 or column[1]== self.species:
keyValue = ""
@@ -196,17 +196,17 @@ class IntervalAnalystPage(templatePage):
curGO = theGO[subGO]
if theGO[subGO].has_key(fieldName):
keyValue = theGO[subGO][fieldName]
-
+
if self.xls:
worksheet.write([nTitleRow, ncol+1], keyValue)
geneRow.append(self.formatTD(keyValue, fieldName, curSpecies, curGO))
-
+
geneTable.append(geneRow)
-
+
if self.xls:
workbook.close()
return geneTable, filename
-
+
def formatTD(self, keyValue, fieldName, Species, theGO):
if keyValue is None:
keyValue = ""
@@ -219,7 +219,7 @@ class IntervalAnalystPage(templatePage):
keyValue = ""
return HT.TD(keyValue, Class="fs12 fwn b1", width=300)
elif fieldName in ("GeneSymbol"):
- webqtlLink = HT.Href("./%s?cmd=sch&gene=%s&alias=1&species=%s" % (webqtlConfig.SCRIPTFILE, keyValue, Species),
+ webqtlLink = HT.Href("./%s?cmd=sch&gene=%s&alias=1&species=%s" % (webqtlConfig.SCRIPTFILE, keyValue, Species),
HT.Image("/images/webqtl_search.gif", border=0, valign="top"), target="_blank")
if theGO['GeneID']:
geneSymbolLink = HT.Href(webqtlConfig.NCBI_LOCUSID % theGO['GeneID'], keyValue, Class="normalsize", target="_blank")
@@ -236,8 +236,8 @@ class IntervalAnalystPage(templatePage):
return HT.TD(keyValue, Class="fs12 fwn b1",align="right")
elif fieldName in ("snpCount"):
if keyValue:
- snpString = HT.Href(url="%s&chr=%s&start=%s&end=%s&geneName=%s&s1=%d&s2=%d" % (os.path.join(webqtlConfig.CGIDIR, 'main.py?FormID=snpBrowser'),
- theGO["Chromosome"], theGO["TxStart"], theGO["TxEnd"], theGO["GeneSymbol"], self.diffColDefault[0], self.diffColDefault[1]),
+ snpString = HT.Href(url="%s&chr=%s&start=%s&end=%s&geneName=%s&s1=%d&s2=%d" % (os.path.join(webqtlConfig.CGIDIR, 'main.py?FormID=snpBrowser'),
+ theGO["Chromosome"], theGO["TxStart"], theGO["TxEnd"], theGO["GeneSymbol"], self.diffColDefault[0], self.diffColDefault[1]),
text=theGO["snpCount"], target="_blank", Class="normalsize")
else:
snpString = keyValue
@@ -252,13 +252,13 @@ class IntervalAnalystPage(templatePage):
return HT.TD(keyValue, Class="fs12 fwn b1",NOWRAP=1)
else:
return HT.TD(keyValue, Class="fs12 fwn b1",NOWRAP=1,align="right")
-
+
def genControlForm(self, fd):
##desc GeneList
self.cursor.execute("Desc GeneList")
GeneListFields = self.cursor.fetchall()
GeneListFields = map(lambda X: X[0], GeneListFields)
-
+
#group columns by category--used for creating the dropdown list of possible columns
categories = {}
for item in self.columnNames.keys():
@@ -267,7 +267,7 @@ class IntervalAnalystPage(templatePage):
categories[category[-1]] = [item ]
else:
categories[category[-1]] = categories[category[-1]]+[item]
-
+
##List All Species in the Gene Table
speciesDict = {}
self.cursor.execute("select Species.Name, GeneList.SpeciesId from Species, GeneList where \
@@ -292,34 +292,34 @@ class IntervalAnalystPage(templatePage):
pass
AppliedField.append(item2)
categories[specName] = AppliedField
-
+
categoriesOrder += ['misc']
-
+
############################################################
## Create the list of possible columns for the dropdown list
############################################################
allColumnsList = HT.Select(name="allColumns", Class="snpBrowserDropBox")
-
+
for category in categoriesOrder:
allFields = categories[category]
if allFields:
geneOpt = HT.Optgroup(label=category.title())
for item in allFields:
if category in self.speciesFreeze.keys():
- geneOpt.append(("%s (%s %s)" % (self.columnNames[item][1], category.title(), self.speciesFreeze[category]),
+ geneOpt.append(("%s (%s %s)" % (self.columnNames[item][1], category.title(), self.speciesFreeze[category]),
"%s__%s" % (item, self.speciesFreeze[category])))
else:
geneOpt.append((self.columnNames[item][1], item))
geneOpt.sort()
allColumnsList.append(geneOpt)
-
+
######################################
## Create the list of selected columns
######################################
-
+
#cols contains the value of all the selected columns
submitCols = cols = fd.formdata.getvalue("columns", "default")
-
+
if cols == "default":
if self.species=="mouse": #these are the same columns that are shown on intervalPage.py
cols = ['GeneSymbol', 'GeneDescription', 'Chromosome', 'TxStart', 'Strand', 'GeneLength', 'GeneID', 'NM_ID', 'snpCount', 'snpDensity']
@@ -331,12 +331,12 @@ class IntervalAnalystPage(templatePage):
else:
if type(cols)==type(""):
cols = [cols]
-
+
colsLst = []
dispFields = []
for column in cols:
if submitCols == "default" and column not in ('GeneSymbol') and (column in GeneListFields or column in speciesField):
- colsLst.append(("%s (%s %s)" % (self.columnNames[column][1], self.species.title(), self.speciesFreeze[self.species]),
+ colsLst.append(("%s (%s %s)" % (self.columnNames[column][1], self.species.title(), self.speciesFreeze[self.species]),
"%s__%s" % (column, self.speciesFreeze[self.species])))
dispFields.append([column, self.species])
else:
@@ -346,17 +346,17 @@ class IntervalAnalystPage(templatePage):
dispFields.append([column])
else:
thisSpecies = self.speciesFreeze[column2[1]]
- colsLst.append(("%s (%s %s)" % (self.columnNames[column2[0]][1], thisSpecies.title(), column2[1]),
+ colsLst.append(("%s (%s %s)" % (self.columnNames[column2[0]][1], thisSpecies.title(), column2[1]),
column))
dispFields.append((column2[0], thisSpecies))
selectedColumnsList = HT.Select(name="columns", Class="snpBrowserSelectBox", multiple="true", data=colsLst, size=6)
-
+
##########################
## Create the columns form
- ##########################
+ ##########################
columnsForm = HT.Form(name="columnsForm", submit=HT.Input(type='hidden'), cgi=os.path.join(webqtlConfig.CGIDIR, self._scriptfile), enctype="multipart/form-data")
columnsForm.append(HT.Input(type="hidden", name="fromdatabase", value= fd.formdata.getvalue("fromdatabase", "unknown")))
- columnsForm.append(HT.Input(type="hidden", name="species", value=self.species))
+ columnsForm.append(HT.Input(type="hidden", name="species", value=self.species))
if self.diffCol:
columnsForm.append(HT.Input(type="hidden", name="s1", value=self.diffCol[0]))
columnsForm.append(HT.Input(type="hidden", name="s2", value=self.diffCol[1]))
@@ -366,8 +366,8 @@ class IntervalAnalystPage(templatePage):
removeButton = HT.Input(type="button", name="remove", value="Remove", Class="button", onClick="removeFromList(this.form.columns.selectedIndex, this.form.columns)")
upButton = HT.Input(type="button", name="up", value="Up", Class="button", onClick="swapOptions(this.form.columns.selectedIndex, this.form.columns.selectedIndex-1, this.form.columns)")
downButton = HT.Input(type="button", name="down", value="Down", Class="button", onClick="swapOptions(this.form.columns.selectedIndex, this.form.columns.selectedIndex+1, this.form.columns)")
- clearButton = HT.Input(type="button", name="clear", value="Clear", Class="button", onClick="deleteAllElements(this.form.columns)")
- submitButton = HT.Input(type="submit", value="Refresh", Class="button", onClick="selectAllElements(this.form.columns)")
+ clearButton = HT.Input(type="button", name="clear", value="Clear", Class="button", onClick="deleteAllElements(this.form.columns)")
+ submitButton = HT.Input(type="submit", value="Refresh", Class="button", onClick="selectAllElements(this.form.columns)")
selectChrBox = HT.Select(name="chromosome")
self.cursor.execute("""
@@ -375,11 +375,11 @@ class IntervalAnalystPage(templatePage):
Chr_Length.Name, Length from Chr_Length, Species
where
Chr_Length.SpeciesId = Species.Id AND
- Species.Name = '%s'
+ Species.Name = '%s'
Order by
Chr_Length.OrderId
""" % self.species)
-
+
results = self.cursor.fetchall()
for chrInfo in results:
selectChrBox.append((chrInfo[0], chrInfo[0]))
@@ -401,5 +401,5 @@ class IntervalAnalystPage(templatePage):
#columnsForm.append(HT.Input(type="hidden", name="sort", value=diffCol),
# HT.Input(type="hidden", name="identification", value=identification),
# HT.Input(type="hidden", name="traitInfo", value=traitInfo))
-
+
return columnsForm, dispFields
diff --git a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py b/wqflask/wqflask/marker_regression/MarkerRegressionPage.py
index d02d80b3..455fcf95 100755
--- a/wqflask/wqflask/marker_regression/MarkerRegressionPage.py
+++ b/wqflask/wqflask/marker_regression/MarkerRegressionPage.py
@@ -140,7 +140,7 @@ class MarkerRegressionPage(templatePage):
intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight))
gifmap = self.plotIntMappingForPLINK(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, plinkResultDict=plinkResultDict)
- intCanvas.save(os.path.join(webqtlConfig.IMGDIR, filename), format='png')
+ intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png')
intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap')
TD_LR = HT.TR(HT.TD(HT.Blockquote(gifmap,intImg, HT.P()), bgColor='#eeeeee', height = 200))
@@ -249,7 +249,7 @@ class MarkerRegressionPage(templatePage):
intCanvas = pid.PILCanvas(size=(self.graphWidth,self.graphHeight))
gifmap = self.plotIntMapping(fd, intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= "")
filename= webqtlUtil.genRandStr("Itvl_")
- intCanvas.save(os.path.join(webqtlConfig.IMGDIR, filename), format='png')
+ intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, filename), format='png')
intImg=HT.Image('/image/'+filename+'.png', border=0, usemap='#WebQTLImageMap')
################################################################
@@ -458,7 +458,7 @@ class MarkerRegressionPage(templatePage):
#plotBar(myCanvas,10,10,390,290,LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test',identification=fd.identification)
Plot.plotBar(myCanvas, LRSArray,XLabel='LRS',YLabel='Frequency',title=' Histogram of Permutation Test')
filename= webqtlUtil.genRandStr("Reg_")
- myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif')
+ myCanvas.save(webqtlConfig.GENERATED_IMAGE_DIR+filename, format='gif')
img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test')
if fd.suggestive == None:
diff --git a/wqflask/wqflask/marker_regression/gemma_mapping.py b/wqflask/wqflask/marker_regression/gemma_mapping.py
index cfcd4783..8fb086c1 100644
--- a/wqflask/wqflask/marker_regression/gemma_mapping.py
+++ b/wqflask/wqflask/marker_regression/gemma_mapping.py
@@ -1,9 +1,7 @@
import os
from base import webqtlConfig
-from utility.tools import gemma_command
-
-GEMMA_PATH,GEMMA_COMMAND = gemma_command()
+from utility.tools import GEMMA_COMMAND
def run_gemma(this_dataset, samples, vals):
"""Generates p-values for each marker using GEMMA"""
@@ -12,8 +10,11 @@ def run_gemma(this_dataset, samples, vals):
gen_pheno_txt_file(this_dataset, samples, vals)
- os.chdir(GEMMA_PATH)
+ # Don't do this!
+ # os.chdir("{}gemma".format(webqtlConfig.HTMLPATH))
+ # use GEMMA_RUN in the next one, create a unique temp file
+
gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/output/%s.cXX.txt -lmm 1 -o %s_output' % (GEMMA_PATH,
this_dataset.group.name,
GEMMA_PATH,
@@ -46,4 +47,4 @@ def parse_gemma_output(this_dataset):
p_values.append(float(line.split("\t")[10]))
#print("p_values: ", p_values)
- return included_markers, p_values \ No newline at end of file
+ return included_markers, p_values
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index fa439b55..b56b179b 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -30,21 +30,16 @@ from flask import Flask, g
from base.trait import GeneralTrait
from base import data_set
from base import species
-from base import webqtlConfig
from utility import webqtlUtil
from utility import helper_functions
from utility import Plot, Bunch
from utility import temp_data
from utility.benchmark import Bench
-from utility.tools import pylmm_command, plink_command, gemma_command
from wqflask.marker_regression import gemma_mapping
-#from wqflask.marker_regression import qtl_reaper_mapping
-#from wqflask.marker_regression import plink_mapping
-#from wqflask.marker_regression import rqtl_mapping
-PYLMM_PATH,PYLMM_COMMAND = pylmm_command()
-PLINK_PATH,PLINK_COMMAND = plink_command()
-GEMMA_PATH,GEMMA_COMMAND = gemma_command()
+from utility.tools import locate, locate_ignore_error, PYLMM_COMMAND, GEMMA_COMMAND, PLINK_COMMAND
+from utility.external import shell
+from base.webqtlConfig import TMPDIR, GENERATED_TEXT_DIR
class MarkerRegression(object):
@@ -299,22 +294,14 @@ class MarkerRegression(object):
included_markers, p_values = self.parse_gemma_output()
self.dataset.group.get_specified_markers(markers = included_markers)
-
- #for marker in self.dataset.group.markers.markers:
- # if marker['name'] not in included_markers:
- # print("marker:", marker)
- # self.dataset.group.markers.markers.remove(marker)
- # #del self.dataset.group.markers.markers[marker]
-
self.dataset.group.markers.add_pvalues(p_values)
-
return self.dataset.group.markers.markers
-
def parse_gemma_output(self):
included_markers = []
p_values = []
- with open("/home/zas1024/gene/web/gemma/output/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:
+ # Use a temporary file name here!
+ with open(webqtlConfig.GENERATED_TEXT_DIR+"/{}_output.assoc.txt".format(self.dataset.group.name)) as output_file:
for line in output_file:
if line.startswith("chr"):
continue
@@ -327,37 +314,18 @@ class MarkerRegression(object):
def gen_pheno_txt_file(self):
"""Generates phenotype file for GEMMA"""
-
- #with open("/home/zas1024/gene/web/gemma/tmp_pheno/{}.txt".format(filename), "w") as outfile:
- # for sample, i in enumerate(self.samples):
- # print("sample:" + str(i))
- # print("self.vals[i]:" + str(self.vals[sample]))
- # outfile.write(str(i) + "\t" + str(self.vals[sample]) + "\n")
-
- with open("/home/zas1024/gene/web/gemma/{}.fam".format(self.dataset.group.name), "w") as outfile:
+ with open(webqtlConfig.GENERATED_TEXT_DIR+"{}.fam".format(self.dataset.group.name), "w") as outfile:
for i, sample in enumerate(self.samples):
outfile.write(str(sample) + " " + str(sample) + " 0 0 0 " + str(self.vals[i]) + "\n")
-
- #def gen_plink_for_gemma(self, filename):
- #
- # make_bed = "/home/zas1024/plink/plink --file /home/zas1024/plink/%s --noweb --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --out %s%s --make-bed" % (webqtlConfig.HTMLPATH,
- # webqtlConfig.HTMLPATH,
- # self.dataset.group.name,
- # webqtlConfig.TMPDIR,
- # filename,
- # webqtlConfig.TMPDIR,
- # filename)
- #
- #
def run_rqtl_plink(self):
- os.chdir("/home/zas1024/plink")
+ # os.chdir("") never do this inside a webserver!!
output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))
self.gen_pheno_txt_file_plink(pheno_filename = output_filename)
- rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)
+ rqtl_command = './plink --noweb --ped %s.ped --no-fid --no-parents --no-sex --no-pheno --map %s.map --pheno %s/%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (self.dataset.group.name, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
os.system(rqtl_command)
@@ -414,10 +382,11 @@ class MarkerRegression(object):
calc_genoprob = ro.r["calc.genoprob"] # Map the calc.genoprob function
read_cross = ro.r["read.cross"] # Map the read.cross function
write_cross = ro.r["write.cross"] # Map the write.cross function
- GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the GENOtoCSVR function
+ GENOtoCSVR = ro.r["GENOtoCSVR"] # Map the local GENOtoCSVR function
- genofilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".geno"
- crossfilelocation = webqtlConfig.HTMLPATH + "genotypes/" + self.dataset.group.name + ".cross"
+ crossname = self.dataset.group.name
+ genofilelocation = locate(crossname + ".geno", "genotype")
+ crossfilelocation = TMPDIR + crossname + ".cross"
#print("Conversion of geno to cross at location:", genofilelocation, " to ", crossfilelocation)
@@ -444,7 +413,7 @@ class MarkerRegression(object):
#print("Pair scan results:", result_data_frame)
self.pair_scan_filename = webqtlUtil.genRandStr("scantwo_") + ".png"
- png(file=webqtlConfig.TMPDIR+self.pair_scan_filename)
+ png(file=TMPDIR+self.pair_scan_filename)
plot(result_data_frame)
dev_off()
@@ -554,8 +523,8 @@ class MarkerRegression(object):
self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)
- plink_command = PLINK_COMMAND + ' --noweb --bed %s/%s.bed --bim %s/%s.bim --fam %s/%s.fam --no-fid --no-parents --no-sex --no-pheno --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, webqtlConfig.TMPDIR, plink_output_filename, self.this_trait.name, self.maf, webqtlConfig.TMPDIR, plink_output_filename)
- #print("plink_command:", plink_command)
+ plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
+ print("plink_command:", plink_command)
os.system(plink_command)
@@ -576,7 +545,7 @@ class MarkerRegression(object):
def gen_pheno_txt_file_plink(self, pheno_filename = ''):
ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
+ output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
header = 'FID\tIID\t%s\n' % self.this_trait.name
output_file.write(header)
@@ -611,7 +580,7 @@ class MarkerRegression(object):
def gen_pheno_txt_file_rqtl(self, pheno_filename = ''):
ped_sample_list = self.get_samples_from_ped_file()
- output_file = open("%s%s.txt" % (webqtlConfig.TMPDIR, pheno_filename), "wb")
+ output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
header = 'FID\tIID\t%s\n' % self.this_trait.name
output_file.write(header)
@@ -756,7 +725,7 @@ class MarkerRegression(object):
threshold_p_value = 0.01
- result_fp = open("%s%s.qassoc"% (webqtlConfig.TMPDIR, output_filename), "rb")
+ result_fp = open("%s%s.qassoc"% (TMPDIR, output_filename), "rb")
header_line = result_fp.readline()# read header line
line = result_fp.readline()
@@ -866,9 +835,7 @@ class MarkerRegression(object):
Redis.expire(key, 60*60)
command = PYLMM_COMMAND+' --key {} --species {}'.format(key,"other")
-
- os.system(command)
-
+ shell(command)
json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
results = json.loads(json_results[1])
@@ -943,12 +910,11 @@ class MarkerRegression(object):
Redis.expire(key, 60*60)
print("before printing command")
- command = PYLMM_COMMAND + ' --key {} --species {}'.format(key,
- "other")
+ command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other")
print("command is:", command)
print("after printing command")
- os.system(command)
+ shell(command)
#t_stats, p_values = lmm.run(key)
#lmm.run(key)
@@ -985,8 +951,7 @@ class MarkerRegression(object):
#def gen_human_results(self, pheno_vector, tempdata):
def gen_human_results(self, pheno_vector, key, temp_uuid):
- file_base = os.path.join(webqtlConfig.PYLMM_PATH, self.dataset.group.name)
- print("file_base:", file_base)
+ file_base = locate(self.dataset.group.name,"mapping")
plink_input = input.plink(file_base, type='b')
input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")
@@ -1078,7 +1043,11 @@ class MarkerRegression(object):
return trimmed_genotype_data
def create_snp_iterator_file(group):
- plink_file_base = os.path.join(webqtlConfig.PYLMM_PATH, group)
+ """
+ This function is only called by main below
+ """
+ raise Exception("Paths are undefined here")
+ plink_file_base = os.path.join(TMPDIR, group)
plink_input = input.plink(plink_file_base, type='b')
data = dict(plink_input = list(plink_input),
@@ -1139,5 +1108,3 @@ def get_markers_from_csv(included_markers, p_values, group_name):
if __name__ == '__main__':
import cPickle as pickle
- import gzip
- create_snp_iterator_file("HLC")
diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
index e0007455..11e312b2 100644
--- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py
+++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
@@ -38,21 +38,11 @@ from htmlgen import HTMLgen2 as HT
from base import webqtlConfig
from base.GeneralObject import GeneralObject
-#from base.webqtlTrait import webqtlTrait
-#from base.templatePage import templatePage
from utility import webqtlUtil
from utility import helper_functions
from utility import Plot
-#from utility.THCell import THCell
-#from utility.TDCell import TDCell
from wqflask.interval_analyst import GeneUtil
-#from dbFunction import webqtlDatabaseFunction
-
-#import logging
-#logging.basicConfig(filename="/tmp/gn_leiyan.log", level=logging.INFO)
-#_log = logging.getLogger("gn\web\webqtl\intervalMapping\IntervalMappingPage.py")
-
#########################################
# Inteval Mapping Plot Page
#########################################
@@ -178,8 +168,8 @@ class MarkerRegression(object):
self.species = start_vars['species']
#Needing for form submission when doing single chr mapping or remapping after changing options
- self.vals = start_vars['vals']
- self.mapping_method = start_vars['mapping_method']
+ self.vals = start_vars['vals']
+ self.mapping_method = start_vars['mapping_method']
if self.mapping_method == "rqtl_geno":
self.mapmethod_rqtl_geno = start_vars['method']
self.mapmodel_rqtl_geno = start_vars['model']
@@ -232,7 +222,7 @@ class MarkerRegression(object):
self.significant = start_vars['significant']
else:
self.nperm = 0
-
+
if 'bootCheck' in start_vars.keys():
self.bootChecked = start_vars['bootCheck']
else:
@@ -308,7 +298,7 @@ class MarkerRegression(object):
if 'showSNP' in start_vars.keys():
self.SNPChecked = start_vars['showSNP']
else:
- self.SNPChecked = False
+ self.SNPChecked = False
if 'showGenes' in start_vars.keys():
self.geneChecked = start_vars['showGenes']
else:
@@ -321,7 +311,7 @@ class MarkerRegression(object):
self.endMb = float(start_vars['endMb'])
except:
self.endMb = -1
- try:
+ try:
self.lrsMax = float(start_vars['lrsMax'])
except:
self.lrsMax = 0
@@ -363,7 +353,7 @@ class MarkerRegression(object):
self.ChrList.append((indChr.name, i))
-
+
self.ChrLengthMbList = g.db.execute("""
Select
Length from Chr_Length, InbredSet
@@ -517,20 +507,19 @@ class MarkerRegression(object):
chrName = self.selectedChr
# Draw the genes for this chromosome / region of this chromosome
webqtldatabase = self.dataset.name
-
+
if self.dataset.group.species == "mouse":
- self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "mouse")
+ self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "mouse")
elif self.dataset.group.species == "rat":
self.geneCol = GeneUtil.loadGenes(chrName, self.diffCol, self.startMb, self.endMb, webqtldatabase, "rat")
-
if self.geneCol and self.intervalAnalystChecked:
#######################################################################
#Nick use GENEID as RefGene to get Literature Correlation Informations#
#For Interval Mapping, Literature Correlation isn't useful, so skip it#
#through set GENEID is None #
#######################################################################
-
+
#GENEID = fd.formdata.getvalue('GeneId') or None
GENEID = None
@@ -538,7 +527,7 @@ class MarkerRegression(object):
self.geneTable(self.geneCol, GENEID)
#geneTable = self.geneTable(self.geneCol, GENEID)
#geneTableContainer.append(geneTable)
-
+
#mainfmName = webqtlUtil.genRandStr("fm_")
#tableForm = HT.Form(cgi=os.path.join(webqtlConfig.CGIDIR, webqtlConfig.SCRIPTFILE), enctype='multipart/form-data', name=mainfmName, submit=HT.Input(type='hidden'))
#tableForm.append(HT.Input(name='FormID', value='', type='hidden'))
@@ -553,22 +542,22 @@ class MarkerRegression(object):
#else:
showLocusForm = ""
intCanvas = pid.PILCanvas(size=(self.graphWidth, self.graphHeight))
- gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm)
+ gifmap = self.plotIntMapping(intCanvas, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm)
self.gifmap = gifmap.__str__()
#print("GIFMAP:", gifmap.__str__())
self.filename= webqtlUtil.genRandStr("Itvl_")
- intCanvas.save(os.path.join(webqtlConfig.IMGDIR, self.filename), format='jpeg')
+ intCanvas.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename), format='jpeg')
intImg=HT.Image('/image/'+self.filename+'.png', border=0, usemap='#WebQTLImageMap')
#Scales plot differently for high resolution
if self.draw2X:
intCanvasX2 = pid.PILCanvas(size=(self.graphWidth*2,self.graphHeight*2))
gifmapX2 = self.plotIntMapping(intCanvasX2, startMb = self.startMb, endMb = self.endMb, showLocusForm= showLocusForm, zoom=2)
- intCanvasX2.save(os.path.join(webqtlConfig.IMGDIR, self.filename+"X2"), format='png')
+ intCanvasX2.save(os.path.join(webqtlConfig.GENERATED_IMAGE_DIR, self.filename+"X2"), format='png')
#DLintImgX2=HT.Href(text='Download',url = '/image/'+self.filename+'X2.png', Class='smallsize', target='_blank')
-
+
#textUrl = self.writeQTL2Text(fd, self.filename)
################################################################
@@ -598,7 +587,7 @@ class MarkerRegression(object):
showLocusForm.append(intImg)
else:
showLocusForm = intImg
-
+
if self.permChecked and self.nperm > 0 and not self.multipleInterval and 0 < self.nperm:
self.perm_filename = self.drawPermutationHistogram()
#perm_text_file = self.permutationTextFile()
@@ -668,7 +657,7 @@ class MarkerRegression(object):
TD_LR.append(HT.Blockquote(tableForm))
self.body = TD_LR
-
+
#self.dict['body'] = TD_LR
#self.dict['title'] = "Mapping"
@@ -859,7 +848,7 @@ class MarkerRegression(object):
BootCoord = []
i = 0
startX = xLeftOffset
-
+
if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes
for j, _chr in enumerate(self.genotype):
BootCoord.append( [])
@@ -882,8 +871,8 @@ class MarkerRegression(object):
else:
Xc = startX + (_locus.cM-_chr[0].cM)*plotXScale
BootCoord[-1].append([Xc, self.bootResult[i]])
- i += 1
-
+ i += 1
+
#reduce bootResult
if self.selectedChr > -1:
maxBootBar = 80.0
@@ -941,7 +930,7 @@ class MarkerRegression(object):
bootScale = bootScale[:-1] + [highestPercent]
bootOffset = 50*fontZoom
- bootScaleFont=pid.Font(ttf="verdana",size=13*fontZoom,bold=0)
+ bootScaleFont=Font(ttf="verdana",size=13*fontZoom,bold=0)
canvas.drawRect(canvas.size[0]-bootOffset,yZero-bootHeightThresh,canvas.size[0]-bootOffset-15*zoom,yZero,fillColor = pid.yellow)
canvas.drawLine(canvas.size[0]-bootOffset+4, yZero, canvas.size[0]-bootOffset, yZero, color=pid.black)
canvas.drawString('0%' ,canvas.size[0]-bootOffset+10,yZero+5,font=bootScaleFont,color=pid.black)
@@ -1412,7 +1401,7 @@ class MarkerRegression(object):
if _strains[ii] in self.dataset.group.samplelist:
temp = GeneralObject(name=_strains[ii], value=_val)
smd.append(temp)
-
+
smd.sort(lambda A, B: cmp(A.value, B.value))
smd.reverse()
@@ -1567,14 +1556,14 @@ class MarkerRegression(object):
firstGene = 0
else:
lastGene = 0
-
+
for j, _geno in enumerate (self.genotype[0][1].genotype):
plotbxd=0
for item in smd:
if item.name == samplelist[j]:
- plotbxd=1
-
+ plotbxd=1
+
if (plotbxd == 1):
ind = 0
@@ -1621,28 +1610,28 @@ class MarkerRegression(object):
currentChromosome = self.genotype[0].name
i = 0
-
+
paddingTop = yTopOffset
ucscPaddingTop = paddingTop + self.WEBQTL_BAND_HEIGHT + self.BAND_SPACING
ensemblPaddingTop = ucscPaddingTop + self.UCSC_BAND_HEIGHT + self.BAND_SPACING
-
+
if zoom == 1:
for pixel in range(xLeftOffset, xLeftOffset + plotWidth, pixelStep):
-
+
calBase = self.kONE_MILLION*(startMb + (endMb-startMb)*(pixel-xLeftOffset-0.0)/plotWidth)
-
+
xBrowse1 = pixel
xBrowse2 = min(xLeftOffset + plotWidth, (pixel + pixelStep - 1))
-
+
WEBQTL_COORDS = "%d, %d, %d, %d" % (xBrowse1, paddingTop, xBrowse2, (paddingTop+self.WEBQTL_BAND_HEIGHT))
bandWidth = xBrowse2 - xBrowse1
WEBQTL_HREF = "javascript:rangeView('%s', %f, %f)" % (self.selectedChr - 1, max(0, (calBase-webqtlZoomWidth))/1000000.0, (calBase+webqtlZoomWidth)/1000000.0)
-
+
WEBQTL_TITLE = "Click to view this section of the genome in WebQTL"
gifmap.areas.append(HT.Area(shape='rect',coords=WEBQTL_COORDS,href=WEBQTL_HREF, title=WEBQTL_TITLE))
canvas.drawRect(xBrowse1, paddingTop, xBrowse2, (paddingTop + self.WEBQTL_BAND_HEIGHT), edgeColor=self.CLICKABLE_WEBQTL_REGION_COLOR, fillColor=self.CLICKABLE_WEBQTL_REGION_COLOR)
canvas.drawLine(xBrowse1, paddingTop, xBrowse1, (paddingTop + self.WEBQTL_BAND_HEIGHT), color=self.CLICKABLE_WEBQTL_REGION_OUTLINE_COLOR)
-
+
UCSC_COORDS = "%d, %d, %d, %d" %(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.UCSC_BAND_HEIGHT))
if self.dataset.group.species == "mouse":
UCSC_HREF = "http://genome.ucsc.edu/cgi-bin/hgTracks?db=%s&position=chr%s:%d-%d&hgt.customText=%s/snp/chr%s" % (self._ucscDb, self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases, webqtlConfig.PORTADDR, self.selectedChr)
@@ -1652,7 +1641,7 @@ class MarkerRegression(object):
gifmap.areas.append(HT.Area(shape='rect',coords=UCSC_COORDS,href=UCSC_HREF, title=UCSC_TITLE))
canvas.drawRect(xBrowse1, ucscPaddingTop, xBrowse2, (ucscPaddingTop+self.UCSC_BAND_HEIGHT), edgeColor=self.CLICKABLE_UCSC_REGION_COLOR, fillColor=self.CLICKABLE_UCSC_REGION_COLOR)
canvas.drawLine(xBrowse1, ucscPaddingTop, xBrowse1, (ucscPaddingTop+self.UCSC_BAND_HEIGHT), color=self.CLICKABLE_UCSC_REGION_OUTLINE_COLOR)
-
+
ENSEMBL_COORDS = "%d, %d, %d, %d" %(xBrowse1, ensemblPaddingTop, xBrowse2, (ensemblPaddingTop+self.ENSEMBL_BAND_HEIGHT))
if self.dataset.group.species == "mouse":
ENSEMBL_HREF = "http://www.ensembl.org/Mus_musculus/contigview?highlight=&chr=%s&vc_start=%d&vc_end=%d&x=35&y=12" % (self.selectedChr, max(0, calBase-flankingWidthInBases), calBase+flankingWidthInBases)
@@ -1767,7 +1756,7 @@ class MarkerRegression(object):
ChrAInfo = []
preLpos = -1
distinctCount = 0.0
-
+
#if len(self.genotype) > 1:
if self.selectedChr == -1: #ZS: If viewing full genome/all chromosomes
for i, _chr in enumerate(self.genotype):
@@ -1810,7 +1799,7 @@ class MarkerRegression(object):
offsetA = -stepA
lineColor = pid.lightblue
startPosX = xLeftOffset
-
+
for j, ChrInfo in enumerate(ChrAInfo):
preLpos = -1
for i, item in enumerate(ChrInfo):
@@ -1878,7 +1867,7 @@ class MarkerRegression(object):
# lodm = self.LODFACTOR
# else:
# lodm = 1.0
-
+
#ZS: This is a mess, but I don't know a better way to account for different mapping methods returning results in different formats + the option to change between LRS and LOD
if self.lrsMax <= 0: #sliding scale
if "lrs_value" in self.qtlresults[0]:
@@ -1891,7 +1880,7 @@ class MarkerRegression(object):
else:
if self.permChecked and self.nperm > 0 and not self.multipleInterval:
self.significant = min(self.significant, webqtlConfig.MAXLRS)
- self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS)
+ self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS)
else:
pass
else:
@@ -1904,10 +1893,10 @@ class MarkerRegression(object):
else:
if self.permChecked and self.nperm > 0 and not self.multipleInterval:
self.significant = min(self.significant, webqtlConfig.MAXLRS)
- self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS)
+ self.suggestive = min(self.suggestive, webqtlConfig.MAXLRS)
else:
pass
-
+
if self.permChecked and self.nperm > 0 and not self.multipleInterval:
LRS_LOD_Max = max(self.significant, LRS_LOD_Max)
@@ -1924,7 +1913,7 @@ class MarkerRegression(object):
LRSScale = 2.5
else:
LRSScale = 1.0
-
+
LRSAxisList = Plot.frange(LRSScale, LRS_LOD_Max, LRSScale)
#make sure the user's value appears on the y-axis
#update by NL 6-21-2011: round the LOD value to 100 when LRS_LOD_Max is equal to 460
@@ -1954,7 +1943,7 @@ class MarkerRegression(object):
#"Significant" and "Suggestive" Drawing Routine
# ======= Draw the thick lines for "Significant" and "Suggestive" ===== (crowell: I tried to make the SNPs draw over these lines, but piddle wouldn't have it...)
-
+
#ZS: I don't know if what I did here with this inner function is clever or overly complicated, but it's the only way I could think of to avoid duplicating the code inside this function
def add_suggestive_significant_lines_and_legend(start_pos_x, chr_length_dist):
rightEdge = int(start_pos_x + chr_length_dist*plotXScale - self.SUGGESTIVE_WIDTH/1.5)
@@ -1977,13 +1966,13 @@ class MarkerRegression(object):
start_pos_x += (chr_length_dist+self.GraphInterval)*plotXScale
return start_pos_x
-
+
for i, _chr in enumerate(self.genotype):
if self.selectedChr != -1:
if _chr.name == self.ChrList[self.selectedChr][0]:
startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[0])
break
- else:
+ else:
continue
else:
startPosX = add_suggestive_significant_lines_and_legend(startPosX, self.ChrLengthDistList[i])
@@ -1998,7 +1987,7 @@ class MarkerRegression(object):
#else:
# dominanceMax = -1
lrsEdgeWidth = 2
-
+
if zoom == 2:
lrsEdgeWidth = 2 * lrsEdgeWidth
@@ -2019,7 +2008,7 @@ class MarkerRegression(object):
if qtlresult['chr'] != previous_chr and self.selectedChr == -1:
if self.manhattan_plot != True:
canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
-
+
if not self.multipleInterval and self.additiveChecked:
plusColor = self.ADDITIVE_COLOR_POSITIVE
minusColor = self.ADDITIVE_COLOR_NEGATIVE
@@ -2049,7 +2038,7 @@ class MarkerRegression(object):
canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
else:
canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
-
+
LRSCoordXY = []
AdditiveCoordXY = []
previous_chr = qtlresult['chr']
@@ -2062,7 +2051,7 @@ class MarkerRegression(object):
#startPosX += (self.ChrLengthDistList[j]+self.GraphInterval)*plotXScale
- #for j, _chr in enumerate(self.genotype):
+ #for j, _chr in enumerate(self.genotype):
#ZS: This is beause the chromosome value stored in qtlresult['chr'] can be (for example) either X or 20 depending upon the mapping method/scale used
if self.plotScale == "physic":
this_chr = str(self.ChrList[self.selectedChr][0])
@@ -2126,7 +2115,7 @@ class MarkerRegression(object):
if self.manhattan_plot != True:
canvas.drawPolygon(LRSCoordXY,edgeColor=thisLRSColor,closed=0, edgeWidth=lrsEdgeWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
-
+
if not self.multipleInterval and self.additiveChecked:
plusColor = self.ADDITIVE_COLOR_POSITIVE
minusColor = self.ADDITIVE_COLOR_NEGATIVE
@@ -2156,7 +2145,7 @@ class MarkerRegression(object):
canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
else:
canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
-
+
if not self.multipleInterval and INTERCROSS and self.dominanceChecked:
plusColor = self.DOMINANCE_COLOR_POSITIVE
minusColor = self.DOMINANCE_COLOR_NEGATIVE
@@ -2186,7 +2175,7 @@ class MarkerRegression(object):
canvas.drawLine(Xc0, Yc0, Xc, Yc, color=plusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
else:
canvas.drawLine(Xc0, yZero - (Yc0-yZero), Xc, yZero - (Yc-yZero), color=minusColor, width=lineWidth, clipX=(xLeftOffset, xLeftOffset + plotWidth))
-
+
###draw additive scale
if not self.multipleInterval and self.additiveChecked:
@@ -2222,7 +2211,7 @@ class MarkerRegression(object):
if zoom == 2:
fontZoom = 1.5
yTopOffset += 30
-
+
#calculate plot scale
if self.plotScale != 'physic':
self.ChrLengthDistList = self.ChrLengthCMList
@@ -2589,13 +2578,13 @@ class MarkerRegression(object):
perm_output = [value/4.16 for value in self.perm_output]
else:
perm_output = self.perm_output
-
+
Plot.plotBar(myCanvas, perm_output, XLabel=self.LRS_LOD, YLabel='Frequency', title=' Histogram of Permutation Test')
filename= webqtlUtil.genRandStr("Reg_")
myCanvas.save(webqtlConfig.IMGDIR+filename, format='gif')
-
+
return filename
-
+
# img=HT.Image('/image/'+filename+'.gif',border=0,alt='Histogram of Permutation Test')
# self.suggestive = self.perm_output[int(self.nperm*0.37-1)]
@@ -2609,9 +2598,9 @@ class MarkerRegression(object):
# permutation.append(HT.TR(HT.TD(img)),
# HT.TR(HT.TD('')),
# HT.TR(HT.TD('Total of %d permutations'%self.nperm)))
-
+
# return permutation
-
+
def permutationTextFile(self):
filename= webqtlUtil.genRandStr("Reg_")
fpText = open('%s.txt' % (webqtlConfig.TMPDIR+filename), 'wb')
@@ -2624,12 +2613,12 @@ class MarkerRegression(object):
'&nbsp;&nbsp;&nbsp;&nbsp;Significant LRS =%3.2f\n'%self.significant,
HT.BR(),
'&nbsp;&nbsp;&nbsp;&nbsp;Highly Significant LRS =%3.2f\n' % self.highlysignificant)
-
+
for lrs_value in self.perm_output:
fpText.write(str(lrs_value) + "\n")
-
+
textUrl = HT.Href(text = 'Download Permutation Results', url= '/tmp/'+filename+'.txt', target = "_blank", Class='fs12 fwn')
-
+
return textUrl
def geneTable(self, geneCol, refGene=None):
@@ -3039,4 +3028,4 @@ class MarkerRegression(object):
sortby = ("", "")
- return sortby \ No newline at end of file
+ return sortby
diff --git a/wqflask/wqflask/search_results.py b/wqflask/wqflask/search_results.py
index a57bfffe..9941a4d3 100755
--- a/wqflask/wqflask/search_results.py
+++ b/wqflask/wqflask/search_results.py
@@ -24,9 +24,8 @@ from flask import Flask, g
from MySQLdb import escape_string as escape
# Instead of importing HT we're going to build a class below until we can eliminate it
-from htmlgen import HTMLgen2 as HT
+# from htmlgen import HTMLgen2 as HT
-from base import webqtlConfig
from utility.benchmark import Bench
from base.data_set import create_dataset
from base.trait import GeneralTrait
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 2d4c952a..79f1c867 100755
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -16,7 +16,6 @@ from base import webqtlConfig
from base import webqtlCaseData
from wqflask.show_trait.SampleList import SampleList
from utility import webqtlUtil, Plot, Bunch, helper_functions
-from utility.tools import pylmm_command, plink_command
from base.trait import GeneralTrait
from base import data_set
from dbFunction import webqtlDatabaseFunction
@@ -24,8 +23,8 @@ from basicStatistics import BasicStatisticsFunctions
from pprint import pformat as pf
-PYLMM_PATH,PYLMM_COMMAND = pylmm_command()
-PLINK_PATH,PLINK_COMMAND = plink_command()
+from utility.tools import flat_files
+MAPPING_PATH = flat_files("mapping")
###############################################
#
@@ -34,8 +33,6 @@ PLINK_PATH,PLINK_COMMAND = plink_command()
#
##############################################
-
-
class ShowTrait(object):
def __init__(self, kw):
@@ -162,14 +159,14 @@ class ShowTrait(object):
def get_mapping_methods(self):
'''Only display mapping methods when the dataset group's genotype file exists'''
def check_plink_gemma():
- if (os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".bed") and
- os.path.isfile(PLINK_PATH+"/"+self.dataset.group.name+".map")):
+ if (os.path.isfile(MAPPING_PATH+"/"+self.dataset.group.name+".bed") and
+ os.path.isfile(MAPPING_PATH+"/"+self.dataset.group.name+".map")):
return True
else:
return False
def check_pylmm_rqtl():
- if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno") and (os.path.getsize(webqtlConfig.NEWGENODIR+self.dataset.group.name+".json") > 0):
+ if os.path.isfile(webqtlConfig.GENODIR+self.dataset.group.name+".geno") and (os.path.getsize(webqtlConfig.JSON_GENODIR+self.dataset.group.name+".json") > 0):
return True
else:
return False
diff --git a/wqflask/wqflask/static/css b/wqflask/wqflask/static/css
deleted file mode 120000
index 9d8c2f68..00000000
--- a/wqflask/wqflask/static/css
+++ /dev/null
@@ -1 +0,0 @@
-../../../web/css \ No newline at end of file
diff --git a/wqflask/wqflask/static/dbdoc/TODO.md b/wqflask/wqflask/static/dbdoc/TODO.md
deleted file mode 100644
index c0a8bab7..00000000
--- a/wqflask/wqflask/static/dbdoc/TODO.md
+++ /dev/null
@@ -1 +0,0 @@
-TODO: Add all database documentation into this folder
diff --git a/wqflask/wqflask/static/images b/wqflask/wqflask/static/images
deleted file mode 120000
index 12f0f8b5..00000000
--- a/wqflask/wqflask/static/images
+++ /dev/null
@@ -1 +0,0 @@
-../../../web/images \ No newline at end of file
diff --git a/wqflask/wqflask/static/javascript b/wqflask/wqflask/static/javascript
deleted file mode 120000
index 5f58faf4..00000000
--- a/wqflask/wqflask/static/javascript
+++ /dev/null
@@ -1 +0,0 @@
-../../../web/javascript \ No newline at end of file
diff --git a/wqflask/wqflask/static/new/css/corr_matrix.css b/wqflask/wqflask/static/new/css/corr_matrix.css
index 495ca28c..cd2b0a80 100755
--- a/wqflask/wqflask/static/new/css/corr_matrix.css
+++ b/wqflask/wqflask/static/new/css/corr_matrix.css
@@ -3,7 +3,7 @@
-moz-transform: rotate(-90deg);
-ms-transform: rotate(-90deg);
-o-transform: rotate(-90deg);
- trasnform: rotate(-90deg);
+ transform: rotate(-90deg);
color: #000
font-size: 22px;
height: 50px;
diff --git a/wqflask/wqflask/templates/collections/view.html b/wqflask/wqflask/templates/collections/view.html
index f92d9984..c1563b9c 100755
--- a/wqflask/wqflask/templates/collections/view.html
+++ b/wqflask/wqflask/templates/collections/view.html
@@ -55,6 +55,18 @@
<input type="submit" class="btn btn-primary" value="WGCNA Analysis" />
</div>
</form>
+ <form action="/ctl_setup" method="post">
+ {% if uc %}
+ <input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" />
+ {% endif %}
+ <input type="hidden" name="trait_list" id="trait_list" value= "
+ {% for this_trait in trait_obs %}
+ {{ this_trait.name }}:{{ this_trait.dataset.name }},
+ {% endfor %}" >
+ <div class="col-xs-2 controls">
+ <input type="submit" class="btn btn-primary" value="CTL Analysis" />
+ </div>
+ </form>
<form action="/heatmap" method="post">
{% if uc %}
<input type="hidden" name="uc_id" id="uc_id" value="{{ uc.id }}" />
diff --git a/wqflask/wqflask/templates/ctl_results.html b/wqflask/wqflask/templates/ctl_results.html
new file mode 100644
index 00000000..a5cb1c08
--- /dev/null
+++ b/wqflask/wqflask/templates/ctl_results.html
@@ -0,0 +1,47 @@
+{% extends "base.html" %}
+{% block title %}CTL results{% endblock %}
+
+{% block content %} <!-- Start of body -->
+<div class="container">
+ <h1>CTL Results</h1>
+ {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input<br>
+ <h3>Network Figure</h3>
+ <a href="/tmp/{{ results['imgurl1'] }}">
+ <img alt="Embedded Image" src="data:image/png;base64,
+ {% for elem in results['imgdata1'] -%}
+ {% print("%c"|format(elem)) %}
+ {%- endfor %}
+ " /></a>
+ <h3>CTL/QTL Plots for individual traits</h3>
+ {% for r in range(2, (request.form['trait_list'].split(',')|length +1)) %}
+ <a href="/tmp/{{ results['imgurl' + r|string] }}">
+ <img width=100 height=100 alt="Embedded Image" src="data:image/png;base64,
+ {% for elem in results['imgdata' + r|string] -%}
+ {% print("%c"|format(elem)) %}
+ {%- endfor %}
+ " /></a>
+ {% endfor %}
+ <h3>Tabular results</h3>
+ <table width="80%">
+ <tr><th>trait</th><th>marker</th><th>trait</th><th>LOD</th><th>dCor</th></tr>
+ significant CTL:<br>
+ {% for r in range(results['ctlresult'][0]|length) %}
+ <tr>
+ {% for c in range(results['ctlresult']|length) %}
+ <td>
+ {% if c > 2 %}
+ {{results['ctlresult'][c][r]|float|round(2)}}
+ {% else %}
+ {{results['ctlresult'][c][r]}}
+ {% endif %}
+ </td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ </table>
+
+
+
+
+</div>
+{% endblock %}
diff --git a/wqflask/wqflask/templates/ctl_setup.html b/wqflask/wqflask/templates/ctl_setup.html
new file mode 100644
index 00000000..9c0d7bea
--- /dev/null
+++ b/wqflask/wqflask/templates/ctl_setup.html
@@ -0,0 +1,65 @@
+{% extends "base.html" %}
+{% block title %}WCGNA analysis{% endblock %}
+
+{% block content %} <!-- Start of body -->
+<div class="container">
+ <h1>CTL analysis parameters</h1>
+ {{(request.form['trait_list'].split(',')|length -1)}} phenotypes as input
+
+<form action="/ctl_results" method="post" class="form-horizontal">
+ <input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}">
+
+ <div class="dropdown">
+ <label for="Strategy">Strategy</label>
+ <div class="col-sm-10">
+ <select name="strategy" id="strategy">
+ <option value="Exact">Exact</option>
+ <option value="Full">Full</option>
+ <option value="Pairwise">Pairwise</option>
+ </select>
+ </div>
+ </div>
+
+ <div class="dropdown">
+ <label for="Permutations">Number of permutation (Used when strategy is Full or Pairwise)</label>
+ <div class="col-sm-10">
+ <select name="nperm" id="nperm">
+ <option value="100">100</option>
+ <option value="1000" selected="selected">1000</option>
+ <option value="10000">10000</option>
+ </select>
+ </div>
+ </div>
+
+ <div class="dropdown">
+ <label for="Coefficient">Type of correlation coefficient</label>
+ <div class="col-sm-10">
+ <select name="parametric" id="parametric">
+ <option value="False">Spearman</option>
+ <option value="True">Pearson</option>
+ </select>
+ </div>
+ </div>
+
+
+ <div class="dropdown">
+ <label for="Significance">Significance level</label>
+ <div class="col-sm-10">
+ <select name="significance" id="significance">
+ <option value="0.1">0.1</option>
+ <option value="0.05" selected="selected">0.05</option>
+ <option value="0.001">0.001</option>
+ </select>
+ </div>
+ </div>
+ <br>
+ <div class="form-group">
+ <div class="col-sm-10">
+ <input type="submit" class="btn btn-primary" value="Run CTL using these settings" />
+ </div>
+ </div>
+
+</form>
+
+</div>
+{% endblock %}
diff --git a/wqflask/wqflask/templates/marker_regression_gn1.html b/wqflask/wqflask/templates/marker_regression_gn1.html
index 7f269048..07f764ca 100644
--- a/wqflask/wqflask/templates/marker_regression_gn1.html
+++ b/wqflask/wqflask/templates/marker_regression_gn1.html
@@ -143,7 +143,7 @@
<div class="tab-pane active" id="gn1_map">
<div class="qtlcharts">
{{ gifmap|safe }}
- <img src="/static/output/{{ filename }}.jpeg" usemap="#WebQTLImageMap">
+ <img src="/generated/{{ filename }}.jpeg" usemap="#WebQTLImageMap">
{% if additiveChecked|upper == "ON" %}
<br>
<span style="white-space: nowrap;">A positive additive coefficient (green line) indicates that {{ dataset.group.parlist[1] }} alleles increase trait values. In contrast, a negative additive coefficient (orange line) indicates that {{ dataset.group.parlist[0] }} alleles increase trait values.</span>
diff --git a/wqflask/wqflask/templates/wgcna_setup.html b/wqflask/wqflask/templates/wgcna_setup.html
index 8ab8c4a0..b4a5730d 100644
--- a/wqflask/wqflask/templates/wgcna_setup.html
+++ b/wqflask/wqflask/templates/wgcna_setup.html
@@ -17,36 +17,36 @@
{% else %}
<form action="/wgcna_results" method="post" class="form-horizontal">
<input type="hidden" name="trait_list" id="trait_list" value= "{{request.form['trait_list']}}">
- <div class="form-group">
+ <div class="form-group">
<label for="SoftThresholds"> Soft threshold: </label>
<div class="col-sm-10">
<input type="text" class="form-inline" name="SoftThresholds" id="SoftThresholds" value="1,2,3,4,5,6,7,8,9">
</div>
</div>
- <div class="form-group">
+ <div class="form-group">
<label for="MinModuleSize"> Minimum module size: </label>
<div class="col-sm-10">
<input type="text" class="form-inline" name="MinModuleSize" id="MinModuleSize" value="30">
</div>
</div>
- <div class="form-group">
+ <div class="form-group">
<label for="TOMtype"> TOMtype: </label>
<div class="col-sm-10">
<input type="text" class="form-inline" name="TOMtype" id="TOMtype" value="unsigned">
</div>
- </div>
- <div class="form-group">
+ </div>
+ <div class="form-group">
<label for="mergeCutHeight"> mergeCutHeight: </label>
<div class="col-sm-10">
<input type="text" class="form-inline" name="mergeCutHeight" id="mergeCutHeight" value="0.25">
</div>
- </div>
- <div class="form-group">
+ </div>
+ <div class="form-group">
<div class="col-sm-10">
<input type="submit" class="btn btn-primary" value="Run WGCNA using these settings" />
</div>
- </div>
- </form>
+ </div>
+ </form>
{% endif %}
</div>
{% endblock %}
diff --git a/wqflask/wqflask/views.py b/wqflask/wqflask/views.py
index 3bf64a18..87ba8b32 100644
--- a/wqflask/wqflask/views.py
+++ b/wqflask/wqflask/views.py
@@ -30,7 +30,7 @@ import sqlalchemy
from wqflask import app
from flask import (render_template, request, make_response, Response,
- Flask, g, config, jsonify, redirect, url_for)
+ Flask, g, config, jsonify, redirect, url_for, send_from_directory)
from wqflask import search_results
from wqflask import gsearch
@@ -48,10 +48,13 @@ from wqflask.correlation_matrix import show_corr_matrix
from wqflask.correlation import corr_scatter_plot
from wqflask.wgcna import wgcna_analysis
+from wqflask.ctl import ctl_analysis
from utility import temp_data
+from utility.tools import TEMPDIR
from base import webqtlFormData
+from base.webqtlConfig import GENERATED_IMAGE_DIR
from utility.benchmark import Bench
from pprint import pformat as pf
@@ -96,7 +99,7 @@ def tmp_page(img_path):
print("img_path:", img_path)
initial_start_vars = request.form
print("initial_start_vars:", initial_start_vars)
- imgfile = open(webqtlConfig.TMPDIR + img_path, 'rb')
+ imgfile = open(GENERATED_IMAGE_DIR + img_path, 'rb')
imgdata = imgfile.read()
imgB64 = imgdata.encode("base64")
bytesarray = array.array('B', imgB64)
@@ -172,6 +175,10 @@ def docedit():
doc = docs.Docs(request.args['entry'])
return render_template("docedit.html", **doc.__dict__)
+@app.route('/generated/<filename>')
+def generated_file(filename):
+ return send_from_directory(GENERATED_IMAGE_DIR,filename)
+
@app.route("/help")
def help():
doc = docs.Docs("help")
@@ -190,6 +197,18 @@ def wcgna_results():
result = wgcna.process_results(wgcnaA) # After the analysis is finished store the result
return render_template("wgcna_results.html", **result) # Display them using the template
+@app.route("/ctl_setup", methods=('POST',))
+def ctl_setup():
+ print("In ctl, request.form is:", request.form) # We are going to get additional user input for the analysis
+ return render_template("ctl_setup.html", **request.form) # Display them using the template
+
+@app.route("/ctl_results", methods=('POST',))
+def ctl_results():
+ print("In ctl, request.form is:", request.form)
+ ctl = ctl_analysis.CTL() # Start R, load the package and pointers and create the analysis
+ ctlA = ctl.run_analysis(request.form) # Start the analysis, a ctlA object should be a separate long running thread
+ result = ctl.process_results(ctlA) # After the analysis is finished store the result
+ return render_template("ctl_results.html", **result) # Display them using the template
@app.route("/news")
def news_route():
@@ -423,7 +442,7 @@ def marker_regression_page():
print("img_path:", img_path)
initial_start_vars = request.form
print("initial_start_vars:", initial_start_vars)
- imgfile = open('/home/zas1024/tmp/' + img_path, 'rb')
+ imgfile = open(TEMPDIR + '/' + img_path, 'rb')
imgdata = imgfile.read()
imgB64 = imgdata.encode("base64")
bytesarray = array.array('B', imgB64)
@@ -452,7 +471,7 @@ def export_pdf():
svg_xml = request.form.get("data", "Invalid data")
print("svg_xml:", svg_xml)
filename = request.form.get("filename", "interval_map_pdf")
- filepath = "/home/zas1024/gene/wqflask/output/"+filename
+ filepath = GENERATED_IMAGE_DIR+filename
pdf_file = cairosvg.svg2pdf(bytestring=svg_xml)
response = Response(pdf_file, mimetype="application/pdf")
response.headers["Content-Disposition"] = "attachment; filename=%s"%filename
diff --git a/wqflask/wqflask/wgcna/wgcna_analysis.py b/wqflask/wqflask/wgcna/wgcna_analysis.py
index 9e9f41bc..6901712b 100644
--- a/wqflask/wqflask/wgcna/wgcna_analysis.py
+++ b/wqflask/wqflask/wgcna/wgcna_analysis.py
@@ -6,7 +6,7 @@ import scipy as sp # SciPy
import rpy2.robjects as ro # R Objects
import rpy2.rinterface as ri
-from base import webqtlConfig # For paths and stuff
+from base.webqtlConfig import GENERATED_IMAGE_DIR
from utility import webqtlUtil # Random number for the image
import base64
@@ -127,7 +127,7 @@ class WGCNA(object):
# The iconic WCGNA plot of the modules in the hanging tree
self.results['imgurl'] = webqtlUtil.genRandStr("WGCNAoutput_") + ".png"
- self.results['imgloc'] = webqtlConfig.TMPDIR + self.results['imgurl']
+ self.results['imgloc'] = GENERATED_IMAGE_DIR + self.results['imgurl']
r_png(self.results['imgloc'], width=1000, height=600)
mergedColors = self.r_labels2colors(network[1])
self.r_plotDendroAndColors(network[5][0], mergedColors, "Module colors", dendroLabels = False, hang = 0.03, addGuide = True, guideHang = 0.05)