about summary refs log tree commit diff
path: root/doc
diff options
context:
space:
mode:
authorzsloan2016-05-18 20:06:41 +0000
committerzsloan2016-05-18 20:06:41 +0000
commitf8c89f7c24cbfcffdafd12ef2087f4de598ed4bd (patch)
treeca233c2f08151377de161a616e69031d557ff808 /doc
parente36ffce0d972334b07ee91b817fc3f30ed3598c4 (diff)
parenta8945cc625f12e9cf733f469426bf0e4b8e83647 (diff)
downloadgenenetwork2-f8c89f7c24cbfcffdafd12ef2087f4de598ed4bd.tar.gz
Merge branch 'staging' of https://github.com/genenetwork/genenetwork2
Diffstat (limited to 'doc')
-rw-r--r--doc/GUIX-Reproducible-from-source.org21
-rw-r--r--doc/GUIX-archive.org106
-rw-r--r--doc/README.org678
-rw-r--r--doc/database.org710
-rw-r--r--doc/new_variable_names.txt6
-rw-r--r--doc/notes_DA.txt10
-rw-r--r--doc/old/gn_installation_notes.txt (renamed from doc/gn_installation_notes.txt)4
-rw-r--r--doc/old/notes.txt (renamed from doc/notes.txt)0
-rw-r--r--doc/requirements.txt36
-rw-r--r--doc/todo.txt2
10 files changed, 1502 insertions, 71 deletions
diff --git a/doc/GUIX-Reproducible-from-source.org b/doc/GUIX-Reproducible-from-source.org
index b88eb9e8..4399ea26 100644
--- a/doc/GUIX-Reproducible-from-source.org
+++ b/doc/GUIX-Reproducible-from-source.org
@@ -4,6 +4,7 @@
  - [[#introduction][Introduction]]
  - [[#binary-deployment][Binary deployment]]
  - [[#from-source-deployment][From source deployment]]
+ - [[#create-archive][Create archive]]
 
 * Introduction
 
@@ -31,5 +32,23 @@ Next build guix (and run) following the instructions in [[https://github.com/pjo
 
 Once that is done we can add the guix-bioinformatics path with
 
-: env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A slurm
+: env GUIX_PACKAGE_PATH=../guix-bioinformatics command
 
+So
+
+#+begin_src sh   :lang bash
+#+begin_src sh   :lang bash
+gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -A genenetwork
+genenetwork1    1.0-d622c803b   out     ../guix-bioinformatics/gn/packages/bioinformatics.scm:163:2
+genenetwork2    2.0-9e9475053   out     ../guix-bioinformatics/gn/packages/bioinformatics.scm:215:2
+#+end_src sh   :lang bash
+
+Install with
+
+#+begin_src sh   :lang bash
+gn-stable-guix$ env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix package -i genenetwork2
+#+end_src sh   :lang bash
+
+* Create archive
+
+: env GUIX_PACKAGE_PATH=../../genenetwork/guix-bioinformatics/ ./pre-inst-env guix archive --export -r genenetwork2 > guix_gn2-2.0-9e9475053.nar
diff --git a/doc/GUIX-archive.org b/doc/GUIX-archive.org
new file mode 100644
index 00000000..67ab5cd0
--- /dev/null
+++ b/doc/GUIX-archive.org
@@ -0,0 +1,106 @@
+* Binary deployment
+
+Note binary deployment is not working pending a few improvements
+to GNU Guix. See source deployment instead.
+
+** Install Guix using a tar ball
+
+GN can be deployed either as a binary tarball or as a GNU Guix
+package. First install GNU Guix following the instructions of the
+[[https://www.gnu.org/software/guix/manual/html_node/Binary-Installation.html#Binary-Installation][binary installation]] using a tar ball from [[https://www.gnu.org/software/guix/download/][here]].
+
+With guix-daemon running you should be able to install the hello
+package:
+
+: guix package -i hello
+
+** Fix locale
+
+You may want to 
+
+#+begin_src sh   :lang bash
+export GUIX_LOCPATH=$HOME/.guix-profile/lib/locale
+export LC_ALL=en_US.utf8
+#+end_src sh   :lang bash
+
+** Authorize our archives
+
+Next add our archive key to guix (as root):
+
+#+begin_src scheme
+echo "(public-key   
+ (ecc 
+  (curve Ed25519)
+  (q #E9A95686D8437186302E07C7AB9BF3913F435026C2D389AF27D9C66FD6EBB649#)
+  )
+ )
+"|guix archive --authorize
+#+end_src scheme
+
+if you have trouble finding a suitable guix try
+
+: ls /gnu/store/*guix-*/bin/guix
+
+and you should be able to use this directly, e.g.
+
+: alias guix=/gnu/store/632msbms2yaldfnlrb5lbnlnmn9yjisw-guix-0.9.0/bin/guix
+: guix --version
+
+** Download and install the GN2 archive
+
+Find the archive on 
+
+  http://files.genenetwork.org/software/
+
+download and install with
+
+#+begin_src bash
+guix archive --import < genenetwork2-data-hash.nar
+#+end_src bash
+
+and you should see a list of packages installing, e.g.
+
+#+begin_src bash
+importing path `/gnu/store/l1zs2drn3zdzl5ysjcmhibcpa35p9zfc-python2-mysqlclient-1.3.7'
+importing path `/gnu/store/n7kfg4knibvblggy8ci2liscl7vz5wkg-python2-parallel-1.6.4'
+importing path `/gnu/store/qvv16qwlq59gp5d07lwbf5n8ndsi3il3-python2-sqlalchemy-1.0.11'
+importing path `/gnu/store/qw872mbmr9ir0a9drv9xw9pvjk05ywwy-python2-xlsxwriter-0.8.4'
+importing path `/gnu/store/wc112m1xfy3p08v14bdzay2ki2rirdsm-pylmm-gn2-1.0-3c6d1cac8'
+importing path `/gnu/store/zfkcy17c2ks3cd9ks14irdabqvmlfpyn-python2-flask-sqlalchemy-2.1'
+importing path `/gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff4'
+(etc.)
+#+end_src bash
+
+The following packages need to be added and the R path set
+
+: export R_LIBS_SITE="/home/wrk/.guix-profile/site-library/"
+: guix package -i /gnu/store/w0dqg9dshq53j8xhcnqgvnvms2s6y5k5-r-wgcna-1.49-425bc170cc0873ddbd414675ac40f6d4d724c7cb
+: guix package -i /gnu/store/k60bdlm0v7xic88j2z5c1jb1jvc371mn-r-qtl-1.38-4
+
+You can add the last one to your profile
+
+: guix package -i /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff
+: export PATH=~/.guix-profile/bin:$PATH
+: genenetwork2
+
+ or run it directly with
+
+: /gnu/store/cgcjdiz1qylbc372gc3nda3372ihkpqb-genenetwork2-2.0-a8fcff/bin/genenetwork2
+
+
+
+** Other
+
+Update guix with a 'guix pull' and make guix visible in the path.
+More information exists also in my [[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org][guix-notes]].
+
+With guix running you should be able to install python, for example.
+
+: guix package -i python2
+
+This will make python appear in $HOME/.guix-profile/bin/python. Suggested
+environment settings can be seen with
+
+: guix package --search-paths
+
+
diff --git a/doc/README.org b/doc/README.org
index f6ab6a52..345341e1 100644
--- a/doc/README.org
+++ b/doc/README.org
@@ -1,28 +1,678 @@
-#+TITLE: Installing GeneNetwork services with GNU Guix
+
+#+TITLE: Installing GeneNetwork services
 
 * Table of Contents                                                     :TOC:
  - [[#introduction][Introduction]]
- - [[#binary-deployment][Binary deployment]]
- - [[#from-source-deployment][From source deployment]]
+ - [[#source-deployment][Source deployment]]
+   - [[#install-guix][Install guix]]
+   - [[#checkout-the-git-repositories][Checkout the git repositories]]
+   - [[#update-guix][Update guix]]
+   - [[#install-gn2][Install GN2]]
+   - [[#run-gn2][Run GN2]]
+   - [[#run-mysql-server][Run MySQL server]]
+   - [[#run-your-own-copy-of-gn2][Run your own copy of GN2]]
+   - [[#set-up-nginx-port-forwarding][Set up nginx port forwarding]]
+ - [[#source-deployment-and-other-information-on-reproducibility][Source deployment and other information on reproducibility]]
+ - [[#trouble-shooting][Trouble shooting]]
+   - [[#importerror-no-module-named-jinja2][ImportError: No module named jinja2]]
+   - [[#error-can-not-find-directory-homegn2_data][ERROR: can not find directory $HOME/gn2_data]]
+   - [[#cant-run-a-module][Can't run a module]]
 
 * Introduction
 
-Large system deployments tend to get very complex. In this document we
-explain the GeneNetwork deployment system which is based on GNU Guix
-(see Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]).
+Large system deployments can get very complex. In this document we
+explain the GeneNetwork version 2 (GN2) reproducible deployment system
+which is based on GNU Guix (see also Pjotr's [[https://github.com/pjotrp/guix-notes/blob/master/README.md][Guix-notes]]). The Guix
+system can be used to install GN with all its files and dependencies.
+
+The official installation path is from a checked out version of the
+main Guix package tree and that of the Genenetwork package
+tree. Current supported versions can be found as the SHA values of
+'gn-latest' branches of [[https://github.com/genenetwork/guix-bioinformatics/tree/gn-latest][Guix bioinformatics]] and [[https://github.com/genenetwork/guix/tree/gn-latest][GNU Guix main]].
+
+* Source deployment
+** Install guix
 
-* Binary deployment
+Deploying from source is also straightforward. Install GNU Guix using
+a binary tar ball as described [[https://github.com/pjotrp/guix-notes][here]].
 
-NYA
+If it works you should be able to install a package with
 
-* From source deployment
+: guix package -i hello
 
-GNU Guix allows for [[https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org][reproducible deployment]] based on a checked out
-Guix repository - use gn-stable for that:
+** Checkout the git repositories
 
-#+begin_src sh   :lang bash
+Check out the two relevant guix and guix-bioinformatics git
+repositories:
+
+#+begin_src bash
+cd ~
 mkdir genenetwork
 cd genenetwork
-git checkout https://github.com/genenetwork/guix.git gn-stable-guix
-git checkout https://github.com/genenetwork/guix-bioinformatics.git
+git clone --branch gn-latest https://github.com/genenetwork/guix-bioinformatics
+git clone --branch gn-latest --recursive https://github.com/genenetwork/guix guix-gn-latest
+cd guix-gn-latest
+#+end_src bash
+
+** Update guix
+
+At some point you may decide to create, install and run a recent
+version of the guix-daemon by compiling the guix repository. Follow
+[[https://github.com/pjotrp/guix-notes/blob/master/INSTALL.org#building-gnu-guix-from-source-using-guix][these]] steps carefully.
+
+** Install GN2
+
+#+begin_src bash
+env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env \
+  guix package -i genenetwork2 --fallback 
+#+end_src bash
+
+Note that you can use the genenetwork.org guix substitute caching
+server at http://guix.genenetwork.org:8080 (which speeds up installs
+significantly because all packages are pre-built). Here an IRC session
+where we installed GN2 from scratch using GNU Guix and a download
+of the test database:
+
+#+begin_src
+<pjotrp> time to get binary install sorted :)  [07:03]
+<pjotrp> Guix is designed for distributed installation servers
+<pjotrp> we have one on guix.genenetwork.org
+<pjotrp> it contains all the prebuild packages
+<pjotrp> for GN
+<user01> okay  [07:04]
+<pjotrp> let's step back however  [07:05]
+<pjotrp> I presume the environment is set with all guix package --search-paths
+<pjotrp> right?
+<user01> yep
+<user01> set to the ones in ~/.guix-profile/
+<pjotrp> good, and you are in gn-latest-guix repo  [07:06]
+<user01> yep  [07:07]
+<pjotrp> git log shows 
+
+Author: David Thompson <dthompson2@worcester.edu>
+Date:   Sun Mar 27 21:20:19 2016 -0400
+
+<user01> yes
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix
+         package -A genenetwork2  [07:08]
+<pjotrp> shows
+
+genenetwork2    2.0-a8fcff4     out ../guix-bioinformatics/gn/packages/genenetwork.scm:144:2
+genenetwork2-database-small     1.0     out ../guix-bioinformatics/gn/packages/genenetwork.scm:270:4
+genenetwork2-files-small        1.0     out ../guix-bioinformatics/gn/packages/genenetwork.scm:228:4
+
+<user01> yeah  [07:09]
+<pjotrp> OK, we are in sync. This means we should be able to install the exact
+         same software
+<pjotrp> I need to start up my guix daemon - I usually run it in a screen
+<pjotrp> screen -S guix-daemon
+<user01> hah, I don't have screen installed yet  [07:11]
+<pjotrp> comes with guix ;)  [07:12]
+<pjotrp> no worries, you can run it any way you want
+<pjotrp> $HOME/.guix-profile/bin/guix-daemon --build-users-group=guixbuild 
+<user01> then something's weird, because it says I don't have it
+<pjotrp> oh, you need to install it first  [07:13]
+<pjotrp> guix package -A screen
+<pjotrp> screen  4.3.1   out     gnu/packages/screen.scm:34:2
+<pjotrp> but you can skip this install, for now
+<user01> alright  [07:14]
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix
+         package -i genenetwork2 --dry-run
+<pjotrp> substitute: updating list of substitutes from
+         'https://mirror.hydra.gnu.org'...  79.1%
+<pjotrp> you see that?
+<pjotrp> followed by  [07:15]
+substitute: updating list of substitutes from
+'https://hydra.gnu.org'... 100.0%
+The following derivations would be built:
+   /gnu/store/rk7nw0rjqqsha958m649wrykadx6mmhl-profile.drv
+
+/gnu/store/7b0qjybvfx8syzvfs7p5rdablwhbkbvs-module-import-compiled.drv
+   /gnu/store/cy9zahbbf23d3cqyy404lk9f50z192kp-module-import.drv
+   /gnu/store/ibdn603i8grf0jziy5gjsly34wx82lmk-gtk-icon-themes.drv
+
+<pjotrp> which should have the same HASH values /gnu/store/7b0qjybvf... etc.
+                                                                        [07:16]
+<user01> profile has a different hash
+<pjotrp> but the next ones?
+<user01> they're the same
+<pjotrp> not sure why profile differs. Do you see the contact with
+         mirror.hydra.org?  [07:17]
+<user01> yeah
+<pjotrp> OK, that means you set the key correctly for that one :)
+<pjotrp> alright we are at the same state now. You can see most packages need
+         to be rebuild because they are no longer cached as binaries on hydra
+                                                                        [07:18]
+<pjotrp> things move fast...
+<user01> hehe
+<pjotrp> let me also do the same on my laptop - which I have staged before
+                                                                        [07:19]
+<pjotrp> btw, to set the path I often do  [07:20]
+<pjotrp> export
+         PATH="/home/wrk/.guix-profile/bin:/home/wrk/.guix-profile/sbin":$PATH
+<pjotrp> to keep things like 'screen' from Debian
+<pjotrp> Once past building guix itself that is normally OK  [07:21]
+<user01> ah, okay
+<user01> will do that
+<pjotrp> the guix build requires certain versions of tools, so you don't want
+         to mix foreign tools in  [07:23]
+<user01> makes sense  [07:24]
+<pjotrp> On my laptop I am trying the main updating list of substitutes from
+         'http://hydra.gnu.org'...  10.5%  [07:27]
+<pjotrp> it is a bit slow, but let's see if there is a difference with the
+         mirror
+<pjotrp> you can see there are two servers here. Actually with recent daemons,
+         if the mirror fails it will try the main server  [07:28]
+<pjotrp> I documented the use of a caching server here  [07:29]
+<pjotrp> https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org
+<pjotrp> this is exactly what we are doing now
+<user01> alrighty  [07:35]
+<pjotrp> To see if a remote server has a guix server running it should respond 
+                                                                        [07:36]
+<pjotrp> lynx http://guix.genenetwork.org:8080 --dump
+<pjotrp> Resource not found: /
+<pjotrp> 
+<pjotrp> you see that?
+<user01> yes  [07:37]
+<pjotrp> good. The main hydra server is too slow. So on my laptop I forced
+         using the mirror with  [07:38]
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix
+         package -i genenetwork2 --dry-run
+         --substitute-urls="http://mirror.hydra.gnu.org"
+<pjotrp> 
+<pjotrp> the list looks the same to me  [07:40]
+<user01> me too
+<pjotrp> note that some packages will be built and some downloaded, right?
+                                                                        [07:41]
+<user01> yes
+<pjotrp> atlas is actually a binary on my system  [07:43]
+<pjotrp> I mean in that list
+<pjotrp> so, it should not build. Same as yours?
+<user01> yeah, atlas and r-gtable are the ones to be downloaded
+<pjotrp> You should not have seen that error ;)
+<pjotrp> we should try and install it this way, try  [07:44]
+<pjotrp>  env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix
+         package -i genenetwork2 --cores=4 --max-jobs=4 --keep-going  [07:46]
+<pjotrp> set CPUs and max-jobs to something sensible
+<pjotrp> Does your VM have multiple cores?
+<pjotrp> note you can always press Ctrl-C during install
+<user01> it doesn't, I'll reboot it and give it another core  [07:47]
+<user02> Hey  [07:48]
+<user02> I'm here
+<user02> Will be stepping away for some breakfast
+<pjotrp> Can you do the same as us
+<pjotrp> Can you see the irc log
+<user02> Alright
+<user02> Yes, I can
+<user02> Please email me a copy in five minutes
+<pjotrp> user01: so when I use the GN server  [07:56]
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix
+         package -i genenetwork2 --dry-run
+         --substitute-urls=http://guix.genenetwork.org:8080
+<pjotrp> I don't need to build anything  [07:57]
+<pjotrp> (this won't work for you, yet)
+<pjotrp> to get it to work you need to 'trust' it   [07:58]
+<pjotrp> but, first get the build going
+<pjotrp> I'll have a coffee while you and get building
+<user01> yeah it's doing its thing now  [08:01]
+<pjotrp> cool  [08:02]
+<pjotrp> in a separate terminal you can try and install with the gn mirror
+                                                                        [08:05]
+<pjotrp> I'll  send you the public key and you can paste it as said
+         https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org
+                                                                        [08:06]
+<user01> alright
+<pjotrp> should be in the E-mail  [08:09]
+<pjotrp> getting it working it kinda nasty since the server gives no feedback
+<pjotrp> it works when you see no more in the build list ;)  [08:11]
+<pjotrp> btw, you can install software in parallel. Guix does that.
+<pjotrp> even the same packages
+<pjotrp> so keep building ;)
+<pjotrp> try and do this with Debian...
+<pjotrp> coffee for me  [08:12]
+<user01> the first build failed                                                                        [08:15]
+<pjotrp> OK, Dennis fixed that one yesterday  [08:27]
+<pjotrp> the problem is that sometime source tarballs disappear  [08:28]
+<pjotrp> R is notorious for that
+<user01> haha, that's inconvenient..
+<pjotrp> well, it is good that Guix catches them
+<pjotrp> but we do not cache sources
+<pjotrp> binaries are cached - to some degree - so we don't have to rebuild
+         those  [08:29]
+<pjotrp> time to use the guix cache at guix.genenetwork.org
+<pjotrp> try and install the key (it is in the E-mail)
+<pjotrp> and see what this lists  [08:31]
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics ./pre-inst-env guix
+         package -i genenetwork2
+         --substitute-urls=http://guix.genenetwork.org:8080 --dry-run
+<pjotrp> should be all binary installs
+<user01> it's not..  [08:32]
+<user01> if I remove --substitute-urls, the list changes, does that mean I
+             have the key set up correctly at least?  [08:33]
+<pjotrp> dunno  [08:35]
+<pjotrp> how many packages does it want to build?
+<pjotrp> should be zero
+<user01> four
+<pjotrp> Ah, that is OK - those are default profile things
+<user01> genenetwork2 is among the ones to be downloaded so  [08:36]
+<pjotrp> remove --dry-run
+<pjotrp> yeah, good sign :)
+<pjotrp> we'll still hit a snag, but run it
+<pjotrp> should be fast
+<user01> doing it  [08:37]
+<user01> it worked!  [08:38]
+<user01> I think  [08:39]
+<pjotrp> heh  [08:40]
+<pjotrp> you mean it is finished?
+<user01> yep
+<pjotrp> type genenetwork2
+<user01> complains about not being able to connect to the database  [08:41]
+<pjotrp> last snag :)
+<pjotrp> no database
+<pjotrp> well, we succeeded in installing a same-byte install of a very
+         complex system :)  [08:42]
+<pjotrp> (always take time to congratulate yourself)
+<pjotrp> now we need to install mysql
+<user01> hehe :)
+<pjotrp> this can be done throug guix or through debian  [08:43]
+<pjotrp> the latter is a bit easier here, so let's do that
+<pjotrp> fun note: you can mix debian and guix
+<pjotrp> Follow instructions on   [08:44]
+<pjotrp>
+         https://github.com/genenetwork/genenetwork2/tree/staging/doc#run-mysql-server
+<pjotrp> apt-get install mysql-common  [08:45]
+<pjotrp> may do it
+<pjotrp> You can also install with guix, but I need to document that
+<pjotrp> btw your internet must be fast :)  [08:46]
+<user01> hehe it is ;)
+<pjotrp> when the database is installed  [08:48]
+<pjotrp> be sure to set the password as instructed  [08:50]
+<pjotrp> when mysql is set the genenetwork2 command should fire up the web
+         server on localhost:5003  [08:58]
+<pjotrp> btw my internet is way slower :)  [09:00]
+<user02> I'm back  [09:04]
+<user02> fixed router firmware upgrade problem
+<user02> unbricking
+<pjotrp> tssk  [09:07]
+<user02> I'll never leave routers to update themselves again  [09:08]
+<user02> self-brick highway
+<user02> Resuming  [09:09]
+<pjotrp> auto-updates are evil
+<pjotrp> always switch them off
+<pjotrp> user02: can you install genenetwork like user has done?  [09:10]
+<pjotrp> pretty well documented here now :)
+<user02> Yes I can  [09:11]
+<user02> Already installed key
+<pjotrp> user02: you are getting binary packages only now?  [09:13]
+<user02> That's the sanest way to go now
+<user02> seriously
+<pjotrp> everything should be pre-built from guix.genenetwork.org
+<pjotrp> you are downloading?
+<user02> yes  [09:15]
+<pjotrp> cool. Maybe an idea to set up a server 
+<pjotrp> for your own use
+<user02> Stuck at downloading preprocesscore
+<pjotrp> should not  [09:24]
+<pjotrp> what does env GUIX_PACKAGE_PATH=../guix-bioinformatics/
+         ./pre-inst-env guix package -i genenetwork2
+         --substitute-urls="http://guix.genenetwork.org:8080" --dry-run
+                                                                        [09:25]
+<pjotrp> say for r-prepocesscore
+<pjotrp> download or build?
+<pjotrp> mine says download  [09:26]
+<user02> it only lists the derivatives to be built
+<user02> nothing else happens  [09:27]
+<pjotrp> OK, so there is a problem
+<pjotrp> your key may not be working
+<pjotrp> everything should be listed as 'to be download'  [09:28]
+<user02> Hmm
+<user02> Ah
+<user02> I know where I messed up
+<pjotrp> where?
+<user02> I did add the key
+<user02> However
+<pjotrp> (I am documenting)
+<user02> I did not tell guix to trust it
+<pjotrp> yes
+<pjotrp> and there is another potential problem
+<user02> Remember the documentation on installing guix?
+<user02> You have to tell guix to trust the default key  [09:29]
+<user02> Right?
+<user02> So in this case
+<pjotrp> read the IRC log
+<user02> That step is mandatory
+<pjotrp> user01: how are you doing?
+<pjotrp> user02:
+         https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org#using-gnu-guix-archive
+                                                                        [09:30]
+<user01> a little bit left on the db download
+<pjotrp> user02: you should see no more building
+<pjotrp> user02: another issue may be that you updated r-preprocesscore
+         package in guix-buinformatics  [09:32]
+<pjotrp> all downstream packages will want to rebuild
+<user02> no, not really
+<user02> It's not even installed
+<pjotrp> checkout a branch of the the old version - make sure we are in synch
+<pjotrp> should be at
+         /gnu/store/y1f3r2xs3fhyadd46nd2aqbr2p9qv2ra-r-biocpreprocesscore-1.32.0
+                                                                        [09:33]
+<pjotrp> 
+<user03> pjotrp: Possibly we should use the archive utility of Guix to do
+        deployment to avoid such out-of-sync differences :)  [09:34]
+<pjotrp> maybe. I did not get archive to update profiles properly  [09:37]
+<pjotrp> Also it is good that they get to understand guix
+         this way
+<pjotrp> carved in stone, eh  [09:38]
+<user02> Yeah, all good  [09:39]
+<user02> My mistake was skipping the guix archive part
+<user02> Can we begin with the install?
+<user02> It's telling me of derivatives that will be downloaded  [09:40]
+<user02> So we're good
+<user02> Here goes
+<pjotrp> yeeha  [09:42]
+<user02> pjotrp, where is this guix.genenetwork.org located at?
+<pjotrp> Tennessee
+<user02> It's...it's....sloooooooowwwwwwwwwwwwww
+<pjotrp> not from Europe
+<pjotrp> is it downloading at all?
+<user02> It should be extended
+<user02> Yes...like at 100KB/s  [09:43]
+<user02> tear-jerker
+<user02> Verizon problems
+<user02> who's the host?
+<pjotrp> I am getting 500Kb/s
+<pjotrp> UT
+<user02> Guix's servers can run off more than one server, right?
+<user02> I'd like to host that particular server here
+<user02> For speed
+<pjotrp> yes
+<user02> Sooner or later
+<user02> It will be a necessity  [09:45]
+<pjotrp> exactly what I am doing - this is our server
+<pjotrp> guix.genenetwork.org:8080
+<user02> All done installing  [09:46]
+<pjotrp> what?
+<user02> Now the databases
+<pjotrp> what do you mean by slow exactly?
+<user02> Yes, it's installed
+<pjotrp> can you run genenetwork2
+<user02> setting variables
+<user02> If I try running it now, it will fail as I don't have the DBs  [09:47]
+<pjotrp> cool - you had a lot of prebuilt packages already
+<pjotrp> OK, follow the instructions I wrote above
+<user01> now everything seems to be working for me :)
+<user02> OK
+<pjotrp> user01: excellent!
+<pjotrp> you see a webserver?
+<user01> yep, can connect to localhost:5003  [09:48]
+<pjotrp> So now you are running a guix copy of GN2
+<pjotrp> you can see where it lives with `which genenetwork2` or ls -l
+         ~/.guix-profile/bin/genenetwork2  [09:49]
+<pjotrp>
+         /gnu/store/1kma5xszvzsvmbb4k699h7gvdncw901i-genenetwork2-2.0-a8fcff4/bin/genenetwork2
+<pjotrp> it is a script
+<pjotrp> written by guix, open it  [09:50]
+<pjotrp> inside it points to paths and our script at
+<pjotrp>
+         /gnu/store/1kma5xszvzsvmbb4k699h7gvdncw901i-genenetwork2-2.0-a8fcff4/bin/.genenetwork2-real
+<pjotrp> if you open that you can see how the webserver is started  [09:51]
+<pjotrp> next step is to run a recent version of GN2
+<user01> okay  [09:52]
+<pjotrp> See
+         https://github.com/genenetwork/genenetwork2/tree/staging/doc#run-your-own-copy-of-gn2
+<pjotrp> but do not checkout that genetwork2_diet
+<pjotrp> we reverted to the main tree
+<pjotrp> clone git@github.com:genenetwork/genenetwork2.git  [09:53]
+<pjotrp> instead and checkout the staging branch 
+<pjotrp> that is effectively my branch  [09:54]
+<pjotrp> when that is done you should be able to fire up the webserver from
+         there  [09:55]
+<pjotrp> using ./bin/genenetwork2
+<user02> now installing DBs
+<user02> Downloading
+<pjotrp> annoyingly the source tree is ~700Mb  [09:56]
+<user02> Can it also be done by installing the guix package
+         genenetwork2-database-small?
+<pjotrp> I changed it in the diet version to 8Mb, but I had to revert
+<user01> I need to make my VM bigger...
+<pjotrp> user02: not ready  [09:57]
+<user02> ok
+<pjotrp> user01: sorry
+<pjotrp> user01: you could mount a local dir inside the VM for development
+<pjotrp> that would allow you to use MAC tools for editing
+<pjotrp> just an idea
+<user01> yeah, I figure I'll do something like that
+<pjotrp> do you use emacs?  [09:58]
+<user01> yep
+<pjotrp> that can also run on remote files over ssh
+<pjotrp> that's an alternative
+<pjotrp> kudos for using emacs :), wdyt user03 
+<user02> 79 minutes to go downloading the db
+<pjotrp> user02: sorry about that  [09:59]
+<pjotrp> it is 2GB
+<user02> user, you can also mount the directory via sshfs
+<user02> Mac OSX runs OpenSSH
+<pjotrp> user02: sopa
+<user02> You can therefore mount a directory outside the VM to the VM via
+         sshfs  [10:00]
+<pjotrp> yes, 3 options now
+<user02> That way, you can set up a VM only for it's logic
+<user02> Apps + the OS it runs  [10:01]
+<user02> For data, let it reside on physical host accessible via sshfs
+<user02> Use this Arch wiki reference:
+         https://wiki.archlinux.org/index.php/SSHFS
+<user02> I edited that last somewhere in 2015, may have been updated since
+         then
+<user01> alright, cool!  [10:04]
+<pjotrp> user01: you are almost done  [10:06]
+<pjotrp> I wrote an elixir package for guix :)
+<pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix
+         package -A elixir
+         --substitute-urls="http://guix.genenetwork.org:8080"   [10:08]
+<pjotrp> elixir  1.2.3   out
+         ../guix-bioinformatics/gn/packages/elixir.scm:31:2
+<pjotrp> 
+<pjotrp> I am building it on guix.genenetwork.org right now  [10:09]
+<user01> nice  [10:10]
 #+end_src
+
+** Run GN2
+
+Make a note of the paths with
+
+#+begin_src bash
+./pre-inst-env guix package --search-paths
+#+end_src bash
+
+After setting the paths for the server
+
+#+begin_src bash
+export PATH=~/.guix-profile/bin:$PATH
+export PYTHONPATH="$HOME/.guix-profile/lib/python2.7/site-packages"
+export R_LIBS_SITE="$HOME/.guix-profile/site-library/"
+export GUIX_GTK3_PATH="$HOME/.guix-profile/lib/gtk-3.0"
+export GI_TYPELIB_PATH="$HOME/.guix-profile/lib/girepository-1.0"
+export XDG_DATA_DIRS="$HOME/.guix-profile/share"
+export GIO_EXTRA_MODULES="$HOME/.guix-profile/lib/gio/modules"
+#+end_src bash
+
+run the main script (in ~/.guix-profile/bin)
+
+#+begin_src bash
+genenetwork2
+#+end_src bash
+
+will start the default server which listens on port 5003, i.e.,
+http://localhost:5003/.
+
+** Run MySQL server
+
+At this point we require the underlying distribution to install
+and run mysqld. 
+
+Download one of
+
+http://files.genenetwork.org/raw_database/
+https://s3.amazonaws.com/genenetwork2/db_webqtl_s.zip
+
+Check the md5sum.
+
+After installation inflate the database binary in the MySQL directory
+(this is subject to change soon) 
+
+: chown -R mysql:mysql db_webqtl_s/
+: chmod 700 db_webqtl_s/
+: chmod 660 db_webqtl_s/*
+
+restart MySQL service (mysqld). Login as root and
+
+: mysql> show databases;
+: +--------------------+
+: | Database           |
+: +--------------------+
+: | information_schema |
+: | db_webqtl_s        |
+: | mysql              |
+: | performance_schema |
+: +--------------------+
+
+Set permissions and match password in your settings file below:
+
+: mysql> grant all privileges on db_webqtl_s.* to gn2@"localhost" identified by 'mysql_password';
+
+Note that if the mysql connection is not working, try connecting to
+the IP address and check server firewall, hosts.allow and mysql IP
+configuration.
+
+** Run your own copy of GN2
+
+At some point you may want to fix the source code. Assuming you have
+Guix and Genenetwork2 installed (as described above) clone the GN2
+repository from https://github.com/genenetwork/genenetwork2_diet
+
+Copy-paste the paths into your terminal (mainly so PYTHON_PATH and
+R_LIBS_SITE are set) from the information given by guix:
+
+: guix package --search-paths
+
+Inside the repository:
+
+: cd genenetwork2
+: ./bin/genenetwork2 
+
+Will fire up your local repo http://localhost:5003/ using the  
+settings in ./etc/default_settings.py. These settings may 
+not reflect your system. To override settings create your own from a copy of
+default_settings.py and pass it into GN2 with
+
+: ./bin/genenetwork2 $HOME/my_settings.py
+
+and everything *should* work (note the full path to the settings
+file). This way we develop against the exact same dependency graph of
+software.
+
+If something is not working, take a hint from the settings file
+that comes in the Guix installation. It sits in something like
+
+: cat ~/.guix-profile/lib/python2.7/site-packages/genenetwork2-2.0-py2.7.egg/etc/default_settings.py
+
+** Set up nginx port forwarding
+
+nginx can be used as a reverse proxy for GN2. For example, we want to
+expose GN2 on port 80 while it is running on port 5003. Essentially
+the configuration looks like
+
+#+begin_src js
+    server {
+        listen 80;
+        server_name test-gn2.genenetwork.org;
+        access_log  logs/test-gn2.access.log;
+
+        proxy_connect_timeout       3000;
+        proxy_send_timeout          3000;
+        proxy_read_timeout          3000;
+        send_timeout                3000;
+
+        location / {
+            proxy_set_header   Host      $http_host;
+            proxy_set_header   Connection keep-alive;
+            proxy_set_header   X-Real-IP $remote_addr;
+            proxy_set_header   X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header   X-Forwarded-Host $server_name;
+            proxy_pass         http://127.0.0.1:5003;
+        }
+}
+#+end_src js
+
+Install the nginx webserver (as root)
+
+: guix package -i nginx
+
+The nginx example configuration examples can be found in the Guix
+store through
+
+: ls -l /root/.guix-profile/sbin/nginx
+: lrwxrwxrwx 3 root guixbuild 66 Dec 31  1969 /root/.guix-profile/sbin/nginx -> /gnu/store/g0wrcl5z27rmk5b52rldzvk1bzzbnz2l-nginx-1.8.1/sbin/nginx
+
+Use that path
+
+: ls /gnu/store/g0wrcl5z27rmk5b52rldzvk1bzzbnz2l-nginx-1.8.1/share/nginx/conf/
+:   fastcgi.conf            koi-win             scgi_params
+:   fastcgi.conf.default    mime.types          scgi_params.default
+:   fastcgi_params          mime.types.default  uwsgi_params
+:   fastcgi_params.default  nginx.conf          uwsgi_params.default
+:   koi-utf                 nginx.conf.default  win-utf
+
+And copy any relevant files to /etc/nginx.  A configuration file for
+GeneNetwork (reverse proxy) port forwarding can be found in the source
+repository under ./etc/nginx-genenetwork.conf. Copy this file to /etc
+(still as root)
+: cp ./etc/nginx-genenetwork.conf /etc/nginx/
+
+Make dirs
+
+: mkdir -p /var/spool/nginx/logs
+
+Add users
+
+: adduser nobody ; addgroup nobody 
+
+Run nginx
+
+: /root/.guix-profile/sbin/nginx -c /etc/nginx/nginx-genenetwork.conf -p /var/spool/nginx
+
+* Source deployment and other information on reproducibility
+
+See the document [[GUIX-Reproducible-from-source.org]].
+
+* Trouble shooting
+
+** ImportError: No module named jinja2
+
+If you have all the Guix packages installed this error points out that
+the environment variables are not set. Copy-paste the paths into your
+terminal (mainly so PYTHON_PATH and R_LIBS_SITE are set) from the
+information given by guix:
+
+: guix package --search-paths
+
+On one system:
+
+: export PYTHONPATH="$HOME/.guix-profile/lib/python2.7/site-packages"
+: export R_LIBS_SITE="$HOME/.guix-profile/site-library/"
+: export GEM_PATH="$HOME/.guix-profile/lib/ruby/gems/2.2.0"
+
+and perhaps a few more. 
+** ERROR: can not find directory $HOME/gn2_data
+
+The default settings file looks in your $HOME/gn2_data. Since these
+files come with a Guix installation you should take a hint from the
+values in the installed version of default_settings.py (see above in
+this document).
+
+** Can't run a module
+
+In rare cases, development modules are not brought in with Guix
+because no source code is available. This can lead to missing modules
+on a running server. Please check with the authors when a module
+is missing.
diff --git a/doc/database.org b/doc/database.org
new file mode 100644
index 00000000..e06ac1ff
--- /dev/null
+++ b/doc/database.org
@@ -0,0 +1,710 @@
+- github Document reduction issue
+
+
+* GeneNetwork Database
+
+** Estimated table sizes 
+
+
+select table_name,round(((data_length + index_length) / 1024 / 1024), 2) `Size in MB` from information_schema.TABLES where table_schema = "db_webqtl" order by data_length;
+
++-------------------------+------------+
+| table_name              | Size in MB |
++-------------------------+------------+
+| ProbeSetData            |   59358.80 |
+| SnpAll                  |   15484.67 |
+| ProbeData               |   22405.44 |
+| SnpPattern              |    9177.05 |
+| ProbeSetSE              |   14551.02 |
+| QuickSearch             |    5972.86 |
+| ProbeSetXRef            |    4532.89 |
+| LCorrRamin3             |   18506.53 |
+| ProbeSE                 |    6263.83 |
+| ProbeSet                |    2880.21 |
+| Probe                   |    2150.30 |
+| GenoData                |    3291.91 |
+| CeleraINFO_mm6          |     989.80 |
+| pubmedsearch            |    1032.50 |
+| ProbeXRef               |     743.38 |
+| GeneRIF_BASIC           |     448.54 |
+| BXDSnpPosition          |     224.44 |
+| EnsemblProbe            |     133.66 |
+| EnsemblProbeLocation    |     105.49 |
+| Genbank                 |      37.71 |
+| TissueProbeSetData      |      74.42 |
+| AccessLog               |      42.38 |
+| GeneList                |      34.11 |
+| Geno                    |      33.90 |
+| MachineAccessLog        |      28.34 |
+| IndelAll                |      22.42 |
+| PublishData             |      22.54 |
+| TissueProbeSetXRef      |      14.73 |
+| ProbeH2                 |      13.26 |
+| GenoXRef                |      22.83 |
+| TempData                |       8.35 |
+| GeneList_rn3            |       5.54 |
+| GORef                   |       4.97 |
+| Phenotype               |       6.50 |
+| temporary               |       3.59 |
+| InfoFiles               |       3.32 |
+| Publication             |       3.42 |
+| Homologene              |       5.69 |
+| Datasets                |       2.31 |
+| GeneList_rn33           |       2.61 |
+| PublishSE               |       4.71 |
+| GeneRIF                 |       2.18 |
+| Vlookup                 |       1.87 |
+| H2                      |       2.18 |
+| PublishXRef             |       2.18 |
+| NStrain                 |       4.80 |
+| IndelXRef               |       2.91 |
+| Strain                  |       1.07 |
+| GeneMap_cuiyan          |       0.51 |
+| user_collection         |       0.30 |
+| CaseAttributeXRef       |       0.44 |
+| StrainXRef              |       0.56 |
+| GeneIDXRef              |       0.77 |
+| Docs                    |       0.17 |
+| News                    |       0.17 |
+| ProbeSetFreeze          |       0.22 |
+| GeneRIFXRef             |       0.24 |
+| Sample                  |       0.06 |
+| login                   |       0.06 |
+| user                    |       0.04 |
+| TableFieldAnnotation    |       0.05 |
+| DatasetMapInvestigator  |       0.05 |
+| User                    |       0.04 |
+| ProbeFreeze             |       0.06 |
+| TableComments           |       0.02 |
+| Investigators           |       0.02 |
+| DBList                  |       0.03 |
+| Tissue                  |       0.02 |
+| GeneChip                |       0.01 |
+| GeneCategory            |       0.01 |
+| SampleXRef              |       0.01 |
+| InbredSet               |       0.01 |
+| SnpAllele_to_be_deleted |       0.00 |
+| Organizations           |       0.01 |
+| PublishFreeze           |       0.00 |
+| GenoFreeze              |       0.00 |
+| Chr_Length              |       0.01 |
+| SnpSource               |       0.00 |
+| AvgMethod               |       0.00 |
+| Species                 |       0.00 |
+| Dataset_mbat            |       0.00 |
+| TissueProbeFreeze       |       0.00 |
+| EnsemblChip             |       0.00 |
+| TissueProbeSetFreeze    |       0.01 |
+| UserPrivilege           |       0.00 |
+| CaseAttribute           |       0.00 |
+| MappingMethod           |       0.00 |
+| DBType                  |       0.00 |
+| InfoFilesUser_md5       |       0.00 |
+| GenoCode                |       0.00 |
+| DatasetStatus           |       0.00 |
+| GeneChipEnsemblXRef     |       0.00 |
+| GenoSE                  |       0.00 |
+| user_openids            |       0.00 |
+| roles_users             |       0.00 |
+| role                    |       0.00 |
+| Temp                    |       NULL |
++-------------------------+------------+
+97 rows in set, 1 warning (0.01 sec)
+
+All *Data tables are large
+
+** User access
+
+According to the meta data:
+
+This table tracks access time and IP addresses. Used for logging in
+registered users and tracking cookies.
+
+# GN1 uses access table and GN2 uses user table (true/false?)
+
+ select * from AccessLog limit 5;
++-------+---------------------+----------------+
+| id    | accesstime          | ip_address     |
++-------+---------------------+----------------+
+| 12174 | 2003-10-28 02:17:41 | 130.120.104.71 |
+| 12173 | 2003-10-28 02:16:27 | 130.120.104.71 |
+|     3 | 2003-02-22 07:38:33 | 192.117.159.1  |
+|     4 | 2003-02-22 07:49:13 | 192.117.159.1  |
+|     5 | 2003-02-22 07:51:08 | 192.117.159.1  |
++-------+---------------------+----------------+
+
+select * from AccessLog order by accesstime desc limit 5;
++---------+---------------------+---------------+
+| id      | accesstime          | ip_address    |
++---------+---------------------+---------------+
+| 1025735 | 2016-02-08 14:23:29 | 100.43.81.157 |
+| 1025734 | 2016-02-08 13:54:28 | 180.76.15.144 |
+| 1025733 | 2016-02-08 13:43:37 | 66.249.65.217 |
+| 1025732 | 2016-02-08 13:39:50 | 66.249.65.217 |
+| 1025731 | 2016-02-08 13:15:46 | 66.249.65.217 |
++---------+---------------------+---------------+
+
+Quite a few trait page hits:
+
+select count(*) from AccessLog;
+
++----------+
+| count(*) |
++----------+
+|  1025685 |
++----------+
+
+show indexes from AccessLog;
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table     | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| AccessLog |          0 | PRIMARY  |            1 | id          | A         |     1025685 |     NULL | NULL   |      | BTREE      |         |               |
++-----------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+This table is being used by both GN1 and GN2 from the trait pages!
+
+: grep -ir AccessLog *|grep -e "^gn1\|^gn2"|grep \.py|grep -v doc
+
+gn1/web/webqtl/showTrait/ShowTraitPage.py:              query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
+gn1/web/webqtl/showTrait/ShowTraitPage.py:                      self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)" ,user_ip)
+gn1/web/webqtl/textUI/cmdClass.py:                      query = """SELECT count(id) FROM AccessLog WHERE ip_address = %s AND UNIX_TIMESTAMP()-UNIX_TIMESTAMP(accesstime)<86400"""
+gn1/web/webqtl/textUI/cmdClass.py:                      query = """INSERT INTO AccessLog(accesstime,ip_address) values(Now(),%s)""" 
+gn2/wqflask/wqflask/show_trait/show_trait_page.py:        query = "SELECT count(id) FROM AccessLog WHERE ip_address = %s and \
+gn2/wqflask/wqflask/show_trait/show_trait_page.py:        self.cursor.execute("insert into AccessLog(accesstime,ip_address) values(Now(),%s)", user_ip)
+
+When looking at the code in GN1 and GN2 it restricts the daily use of
+the trait data page (set to 1,000 - whoever reaches that?). Unlike
+mentioned in the schema description, this table does *not* keep track
+of cookies. 
+
+From the code it looks like GN2 uses a mixture of Redis and sqlalchemy
+to keep track of logged in sessions (see
+gn2/wqflask/wqflask/user_manager.py) and cookies through a user_uuid in 
+model.py.
+
+In gn2/wqflask/wqflask/templates/collections/view_anonymous.html it
+show_trait_page appears to be loaded (need to check).
+
+** AvgMethod 
+
+Probesetfreeze refers to AvgMethod
+
+** BXDSnPosition
+
+Snp table (all snps)
+
+Mapping in GN1 shows snps when  you select a chromosome.
+
+** CaseAttribute(XRef)
+
+Metadata
+
+** CeleralINFO_mm6
+
+?
+
+** Chr_Length
+
+Default mm9, column for mm8
+
+** Dataset_mbat
+
+Menu for BXD (linkouts)
+
+** DatasetMapInvestigator
+
+Arthur?
+
+** DataSets
+
+Information/metadata
+
+** DatasetStatus
+
+Arthur private/public
+
+** DBList and DBType
+
+Hooked in API (URL encoding)
+
+** Docs
+
+GN2 only (see menu bar)
+
+** Ensembl* 
+
+Probe information
+
+(will be deprecated)
+
+** Genbank
+
+Linkout and not important
+
+** GeneCategory
+
+Not important. GeneWiki notes function classification.
+
+Deprecate.
+
+** GeneChip
+
+** GeneIDXRef
+
+Interspecies gene comparison
+
+** GeneList
+
+Track info
+
+** Genlist_rn3(3)
+
+Rat list
+
+** GeneMap_cuiyan
+
+Link outs
+
+** GeneRIF
+
+Wiki info (nightly updated from NCBI)
+
+XRef should be foreign keys
+
+** Geno
+
+SNP or marker info
+
+** GenoCode
+
+Belongs to someone else
+
+** GenoData
+
+Allele info
+
+** GenoFreeze
+
+Big menu (Freeze refers to menu)
+
+** GenoSE
+
+SE standard err, not used
+
+** GenoXREF
+
+Very important. Key links between Geno, GenoData
+
+** GORef
+
+GO terms
+
+** H2
+
+Heritability for probeset(?)
+
+** Homologene
+
+Homology, not used much
+
+** InbredSet
+
+Group in menu
+
+** Indelall, SnpAll, SnpPattern, SnpSource
+
+Indel Snp browser (variant browser Gn1)
+
+** Info*
+
+Infra system PhP 
+
+Data Info button 
+
+Infosystem users has separate entries
+
+Also Investigators, User, Organizations, 
+
+** LCorrRamin3 
+
+Lit. Correlations Prof. Ramin
+
+** Login
+
+GN2 login info
+
+** MachineAccessLog
+
+Old
+
+** MappingMethod
+
+GN1
+
+** News
+
+GN2
+
+** NStrain
+
+pheno           publishfreeze (menu)
+                       xref (keys)
+                          xref links to publish (pubmed), phenotype, pubishdata
+geno            genofreeze
+                       xref (keys)
+                          xref links to publish (pubmed), genotype, genodata
+probeset/expr.  probesetfreeze
+                       xref (keys)
+                          xref links to publish (pubmed), probeset, probesetdata
+probe/expr.  probefreeze
+                       xref (keys)
+                          xref links to publish (pubmed), probe, probedata
+
+Each dataset has 3 values (real value (1), number of samples (2), stderr (3))
+
+NStrain = number of phenotype samples
+
+ProbesetFreeze contains all data, incl. metabolomic.
+
+** Phenotype
+
+This table contains names, full descriptions, and short symbols for
+traits and phenotype used primarily in the Published Phenotypes
+databases.
+
+Contains 10k rows, March 2016, of which 5000 are for the BXDs). 
+
+| Id | Pre_publication_description | Post_publication_description                                                                                         | Original_description                                                                                                                                        | Units                | Pre_publication_abbreviation | Post_publication_abbreviation | Lab_code | Submitter   | Owner | Authorized_Users |
++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
+|  1 | NULL                        | Hippocampus weight                                                                                                   | Original post publication description: Hippocampus weight                                                                                                   | Unknown              | NULL                         | HPCWT                         | NULL     | robwilliams | NULL  | robwilliams      |
+|  2 | NULL                        | Cerebellum weight                                                                                                    | Original post publication description: Cerebellum weight                                                                                                    | mg                   | NULL                         | CBLWT                         | NULL     | robwilliams | NULL  | robwilliams      |
+|  3 | NULL                        | Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide  [units/100 ug protein] | Original post publication description: Interleukin 1 activity by peritoneal macrophages stimulated with 10 ug/ml lipopolysaccharide  [units/100 ug protein] | units/100 ug protein | NULL                         | IL1Activity                   | NULL     | robwilliams | NULL  | robwilliams      |
+|  4 | NULL                        | Central nervous system, morphology: Cerebellum weight, whole, bilateral in adults of both sexes [mg]                 | Original post publication description: Cerebellum weight [mg]                                                                                               | mg                   | NULL                         | CBLWT2                        | NULL     | robwilliams | NULL  | robwilliams      |
+|  5 | NULL                        | The coat color of 79 BXD RI strain                                                                                   | Original post publication description: The coat color of 79 BXD RI strain                                                                                   | Unknown              | NULL                         | CoatColor                     | NULL     | robwilliams | NULL  | robwilliams      |
++----+-----------------------------+----------------------------------------------------------------------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------+------------------------------+-------------------------------+----------+-------------+-------+------------------+
+5 rows in set (0.00 sec)
+
+** ProbeData
+
+Table with fine-grained probe level Affymetrix data only. Contains 1
+billion rows March 2016. This table may be deletable since it is only
+used by the Probe Table display in GN1. Not used in GN2
+(double-check).
+
+In comparison the "ProbeSetData" table contains more molecular assay
+data, including probe set data, RNA-seq data, proteomic data, and
+metabolomic data. 2.5 billion rows March 2016. In comparison,
+ProbeData contains data only for Affymetrix probe level data
+(e.g. Exon array probes and M430 probes).
+
+"ProbeData.StrainId" should be "CaseId" or "SampleId".
+
+"ProbeData" should probably be "AssayData" or something more neutral.
+
+select * from ProbeData limit 2;
++--------+----------+---------+
+| Id     | StrainId | value   |
++--------+----------+---------+
+| 503636 |       42 | 11.6906 |
+| 503636 |       43 | 11.4205 |
++--------+----------+---------+
+2 rows in set (0.00 sec)
+
+select count(*) from ProbeData limit 2;
++-----------+
+| count(*)  |
++-----------+
+| 976753435 |
++-----------+
+1 row in set (0.00 sec)
+
+** ProbeSet
+
+Comment: PLEASE CHANGE TABLE NAME and rework fields carefully. This is
+a terrible table but it works well (RWW March 2016). It is used in
+combination with the crucial TRAIT DATA and ANALYSIS pages in GN1 and
+GN2. It is also used by annotators using the UPDATE INFO AND DATA web
+form to correct and update annotation. It is used by Arthur to enter
+new annotation files and metadata for arrays, genes, proteins,
+metabolites. The main problem with this table is that it is doing too
+much work.
+
+Initially (2003) this table contained only Affymetrix ProbeSet data
+for mouse (U74aV2 initially). Many other array platforms for different
+species were added. At least four other major categories of molecular
+assays have been added since about 2010.
+
+1. RNA-seq annotation and sequence data for transcripts using ENSEMBL
+   identifiers or NCBI NM_XXXXX and NR_XXXXX type identifiers
+
+2. Protein and peptide annotation and sequence data (see BXD Liver
+   Proteome data, SRM and SWATH type data) with identifiers such as
+   "abcb10_q9ji39_t311" for SRM data and "LLGNMIVIVLGHHLGKDFTPAAQAA"
+   for SWATH data where the latter is just the peptide fragment that
+   has been quantified. Data first entered in 2015 for work by Rudi
+   Aebersold and colleagues.
+
+3. Metabolite annotation and metadata (see BXD Liver Metabolome data)
+   with identifiers that are usually Mass charge ratios such as
+   "149.0970810_MZ"
+
+4. Epigenomic and methylome data (e.g. Human CANDLE Methylation data
+   with identifiers such as "cg24523000")
+
+It would make good sense to break this table into four or more types
+of molecular assay metadata or annotation tables) (AssayRNA_Anno,
+AssayProtein_Anno, AssayMetabolite_Anno, AssayEpigenome_Anno,
+AssayMetagenome_Anno), since these assays will have many differences
+in annotation content compared to RNAs.
+
+Some complex logic is used to update contents of this table when
+annotators modify and correct the information (for example, updating
+gene symbols). These features requested by Rob so that annotating one
+gene symbol in one species would annotate all gene symbols in the same
+species based on common NCBI GeneID number. For example, changing the
+gene alias for one ProbeSet.Id will changing the list of aliases in
+all instances with the same gene symbol.
+
+If the ProbeSet.BlatSeq (or is this ProbSetTargetSeq) is identical
+between different ProbeSet.Ids then annotation is forced to be the
+same even if the symbol or geneID is different. This "feature" was
+implemented when we found many probe sets with identical sequence but
+different annotations and identifiers.
+
+
+select count(*) from ProbeSet limit 5;
++----------+
+| count(*) |
++----------+
+|  4351030 |
++----------+
+
+| Id   | ChipId | Name     | TargetId | Symbol | description                                  | Chr  | Mb        | alias    | GeneId | GenbankId | SNP  | BlatSeq                                                                                                                                                                      | TargetSeq                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | UniGeneId | Strand_Probe | Strand_Gene | OMIM   | comments | Probe_set_target_region | Probe_set_specificity | Probe_set_BLAT_score | Probe_set_Blat_Mb_start | Probe_set_Blat_Mb_end | Probe_set_strand | Probe_set_Note_by_RW | flag | Symbol_H | description_H | chromosome_H | MB_H | alias_H | GeneId_H | chr_num | name_num | Probe_Target_Description | RefSeq_TranscriptId | Chr_mm8 | Mb_mm8    | Probe_set_Blat_Mb_start_mm8 | Probe_set_Blat_Mb_end_mm8 | HomoloGeneID | Biotype_ENS | ProteinID | ProteinName | Flybase_Id | HMDB_ID | Confidence | ChEBI_ID | ChEMBL_ID | CAS_number | PubChem_ID | ChemSpider_ID | UNII_ID | EC_number | KEGG_ID | Molecular_Weight | Nugowiki_ID | Type | Tissue | PrimaryName | SecondaryNames | PeptideSequence |
++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+
+| 7282 |      1 | 93288_at | NULL     | Arpc2  | actin related protein 2/3 complex, subunit 2 | 1    | 74.310961 | AK008777 | 76709  | AI835883  |    0 | CCGACTTCCTTAAGGTGCTCAACCGGACTGCTTGCTACTGGATAATCGTGAGGGATTCTCCATTTGGGTTCCATTTTGTACGAGTTTGGCAAATAACCTGCAGAAACGAGCTGTGCTTGCAAGGACTTGATAGTTCCTAATCCTTTTCCAAGCTGTTTGCTTTGCAATATGT | ccgacttccttaaggtgctcaaccgtnnnnnnccnannnnccnagaaaaaagaaatgaaaannnnnnnnnnnnnnnnnnnttcatcccgctaactcttgggaactgaggaggaagcgctgtcgaccgaagnntggactgcttgctactggataatcgtnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnntgagggattctccatttgggttccattttgtacgagtttggcaaataacctgcagaaacgagctgtgcttgcaaggacttgatagttcctaagaattanaanaaaaaaaanaanttccacttgatcaanttaattcccttttatttttcctccctcantccccttccttttccaagctgtttgctttgcaatatgt                                                                                                                                                                                                                                     | Mm.337038 | +            |             | 604224 |          | NULL                    |                  8.45 |                  169 |               74.310961 |              74.31466 | NULL             | NULL                 | 3    | NULL     | NULL          | NULL         | NULL | NULL    | NULL     |       1 |    93288 | NULL                     | XM_129773           | 1       | 74.197594 |                   74.197594 |                 74.201293 | 4187         | NULL        | NULL      | NULL        | NULL       | NULL    |       NULL |     NULL | NULL      | NULL       |       NULL |          NULL | NULL    | NULL      | NULL    |             NULL |        NULL | NULL | NULL   | NULL        | NULL           | NULL            |
++------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+
+2 rows in set (0.00 sec)
+
+
+
+
+** ProbeSetData
+
+Probedata - main molecular data. Probesets, metabolome, 
+
+Almost all important molecular assay data is in this table including
+probe set data, RNA-seq data, proteomic data, and metabolomic
+data. 2.5 billion rows March 2016. In comparison, ProbeData contains
+data only for Affymetrix probe level data (e.g. Exon array probes and
+M430 probes).
+
+select count(*) from ProbeSetData limit 5;
++---------------+
+| count(*)      |
++---------------+
+| 2,510,566,472 |
++---------------+
+
+
+select * from ProbeSetData limit 5;
++----+----------+-------+
+| Id | StrainId | value |
++----+----------+-------+
+|  1 |        1 | 5.742 |
+|  1 |        2 | 5.006 |
+|  1 |        3 | 6.079 |
+|  1 |        4 | 6.414 |
+|  1 |        5 | 4.885 |
++----+----------+-------+
+
+show indexes  from ProbeSetData;
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table        | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| ProbeSetData |          0 | DataId   |            1 | Id          | A         |    34868978 |     NULL | NULL   |      | BTREE      |         |               |
+| ProbeSetData |          0 | DataId   |            2 | StrainId    | A         |  2510566472 |     NULL | NULL   |      | BTREE      |         |               |
++--------------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+select * from Strain limit 5;
++----+----------+----------+-----------+--------+-------+
+| Id | Name     | Name2    | SpeciesId | Symbol | Alias |
++----+----------+----------+-----------+--------+-------+
+|  1 | B6D2F1   | B6D2F1   |         1 | NULL   | NULL  |
+|  2 | C57BL/6J | C57BL/6J |         1 | B6J    | NULL  |
+|  3 | DBA/2J   | DBA/2J   |         1 | D2J    | NULL  |
+|  4 | BXD1     | BXD1     |         1 | NULL   | NULL  |
+|  5 | BXD2     | BXD2     |         1 | NULL   | NULL  |
++----+----------+----------+-----------+--------+-------+
+
+show indexes from Strain;
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Table  | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment |
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+| Strain |          0 | PRIMARY  |            1 | Id          | A         |       14368 |     NULL | NULL   |      | BTREE      |         |               |
+| Strain |          0 | Name     |            1 | Name        | A         |       14368 |     NULL | NULL   | YES  | BTREE      |         |               |
+| Strain |          0 | Name     |            2 | SpeciesId   | A         |       14368 |     NULL | NULL   |      | BTREE      |         |               |
+| Strain |          1 | Symbol   |            1 | Symbol      | A         |       14368 |     NULL | NULL   | YES  | BTREE      |         |               |
++--------+------------+----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
+
+A typical query may look like
+
+SELECT Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id
+                    FROM (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                    left join ProbeSetSE on
+                      (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                    WHERE
+                      ProbeSetFreeze.name = 'B139_K_1206_M' AND
+                      ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                      ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                      ProbeSetXRef.DataId = ProbeSetData.Id AND
+                      ProbeSetData.StrainId = Strain.Id
+                    Order BY Strain.Name
+
++-------+-------+-------+----------+
+| Name  | value | error | Id       |
++-------+-------+-------+----------+
+| SM001 |  38.3 |  NULL | 25309550 |
+| SM001 |   2.7 |  NULL | 25309520 |
+| SM001 |  20.3 |  NULL | 25309507 |
+| SM001 | 125.8 |  NULL | 25309511 |
+| SM001 |   8.2 |  NULL | 25309534 |
++-------+-------+-------+----------+
+5 rows in set (22.28 sec)
+
+select * from ProbeSetFreeze limit 5;
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+| Id | ProbeFreezeId | AvgID | Name        | Name2                           | FullName                                    | ShortName               | CreateTime | OrderList | public | confidentiality | AuthorisedUsers | DataScale |
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+|  1 |             3 |     1 | Br_U_0803_M | BXDMicroArray_ProbeSet_August03 | UTHSC Brain mRNA U74Av2 (Aug03) MAS5        | Brain U74Av2 08/03 MAS5 | 2003-08-01 |      NULL |      0 |               0 | NULL            | log2      |
+|  2 |            10 |     1 | Br_U_0603_M | BXDMicroArray_ProbeSet_June03   | UTHSC Brain mRNA U74Av2 (Jun03) MAS5        | Brain U74Av2 06/03 MAS5 | 2003-06-01 |      NULL |      0 |               0 | NULL            | log2      |
+|  3 |             8 |     1 | Br_U_0303_M | BXDMicroArray_ProbeSet_March03  | UTHSC Brain mRNA U74Av2 (Mar03) MAS5        | Brain U74Av2 03/03 MAS5 | 2003-03-01 |      NULL |      0 |               0 | NULL            | log2      |
+|  4 |             5 |     1 | Br_U_0503_M | BXDMicroArray_ProbeSet_May03    | UTHSC Brain mRNA U74Av2 (May03) MAS5        | Brain U74Av2 05/03 MAS5 | 2003-05-01 |      NULL |      0 |               0 | NULL            | log2      |
+|  5 |             4 |     1 | HC_U_0303_M | GNFMicroArray_ProbeSet_March03  | GNF Hematopoietic Cells U74Av2 (Mar03) MAS5 | GNF U74Av2 03/03 MAS5   | 2003-03-01 |      NULL |      0 |               0 | NULL            | log2      |
++----+---------------+-------+-------------+---------------------------------+---------------------------------------------+-------------------------+------------+-----------+--------+-----------------+-----------------+-----------+
+
+ select * from ProbeSetXRef limit 5;
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+| ProbeSetFreezeId | ProbeSetId | DataId | Locus_old  | LRS_old            | pValue_old | mean              | se                  | Locus           | LRS                | pValue | additive             | h2   |
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+|                1 |          1 |      1 | 10.095.400 |   13.3971627898894 |      0.163 |  5.48794285714286 | 0.08525787814808819 | rs13480619      | 12.590069931048001 |  0.269 |          -0.28515625 | NULL |
+|                1 |          2 |      2 | D15Mit189  | 10.042057464356201 |      0.431 |  9.90165714285714 |  0.0374686634976217 | CEL-17_50896182 |   10.5970737900941 |  0.304 | -0.11678333333333299 | NULL |
+|                1 |          3 |      3 | D5Mit139   |   5.43678531742749 |      0.993 |  7.83948571428571 |  0.0457583416912569 | rs13478499      |    6.0970532702754 |  0.988 |    0.112957489878542 | NULL |
+|                1 |          4 |      4 | D1Mit511   |   9.87815279480766 |      0.483 | 8.315628571428569 |  0.0470396593931327 | rs6154379       | 11.774867551173099 |  0.286 |   -0.157113725490196 | NULL |
+|                1 |          5 |      5 | D16H21S16  | 10.191723834264499 |      0.528 |  9.19345714285714 |  0.0354801718293322 | rs4199265       | 10.923263374016202 |  0.468 |  0.11476470588235299 | NULL |
++------------------+------------+--------+------------+--------------------+------------+-------------------+---------------------+-----------------+--------------------+--------+----------------------+------+
+
+
+Note that the following unlimited search is very slow:
+
+select max(value) from ProbeSetData; 
+
++------------+
+| max(value) |
++------------+
+|   26436006 |
++------------+
+1 row in set (2 min 16.31 sec)
+
+which is in some form is used in the search page, see [[https://github.com/genenetwork/genenetwork2_diet/blob/master/wqflask/wqflask/do_search.py#L811][the search code]].
+
+
+*** Improvements?
+
+Suggestions on the schema page:
+
+"StrainId" should be "CaseId" or "SampleId".
+
+"ProbeSetData" should probably be "AssayData" or something more neutral. 
+
+*** Comments
+
+I think the ProbeSetData table should be generalized to a 'phenotypes'
+table with an 'sample_id' column and a 'value' column. 
+
+A new table 'samples' will link each sample against an 'experiment',
+an 'individual' and which in turn can link to a 'strain'.
+
+Experiment is here in a wide sense, GTex can be one - I don't want to
+use dataset ;)
+
+This means a (slight) reordering:
+
+phenotypes:  (id), sample_id, value
+samples:     experiment_id, individual_id
+experiments: name, version
+individual:  strain_id
+strains:     species_id
+species:     ...
+
+ProbeData is also interesting, because it has the same structure as
+ProbeSetData, but only contains microarrays. This tables should be one
+(when we clear up the cross-referencing) as they both contain
+phenotype values. Both are large tables.
+
+PublishData is another phenotype table with values only which can be
+merged into that same table.
+
+So we have phenotype data in 3 tables with exactly the same
+layout. There is also TissueProbeSet*, but we'll ignore those for
+now. I think we should merge these into one and have the sample ref
+refer to the type of data (probeset, probe, metabolomics,
+whatever). These are all phenotype values and by having them split
+into different tables they won't play well when looking for
+correlations.
+
+ProbeSet contains the metadata on the probes and should (eventually)
+move into NoSQL. There is plenty redundancy in that table now.
+
+I know it is going to be a pain to reorganize the database, but if we
+want to use it in the long run we are going to have to simplify it.
+
+
+
+** Publication and publishdata (all pheno)
+
+Phenotype pubs
+
+** QuickSearch
+
+No longer used
+
+** role
+
+empty
+
+** Sample*
+
+No longer used
+
+** Species & Strain (should be sample)
+
+Menu
+
+** InbredSet 
+
+Menu
+
+** TableComments
+
+Metadata on DB
+
+** Temp*
+
+User upload data
+
+** Tissue
+
+Menu - 3rd level
+
+** TissueP*
+
+Correlation tables
+
+** User collection
+
+User selection - retained
+
+** UserPrivilege
+
+** Vlookup 
+
diff --git a/doc/new_variable_names.txt b/doc/new_variable_names.txt
deleted file mode 100644
index c11c160e..00000000
--- a/doc/new_variable_names.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-RISet/riset -> group
-webqtlDataset.py -> data_set.py
-webqtlDataset (class object) -> DataSet
-database/db -> dataset/data_set
-DataEditingPage -> show_trait.py/show_trait.html
-webqtlTrait -> GeneralTrait
\ No newline at end of file
diff --git a/doc/notes_DA.txt b/doc/notes_DA.txt
deleted file mode 100644
index 410e0182..00000000
--- a/doc/notes_DA.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-Danny's notes about the genenetwork source
-
-Location of static files:
-
-Location of HTML templates:       wqflask/wqflask/templates/
-
-Entry point of the wqflask app:   wqflask/wqflask/__init__.py
-
-Application routes:               wqflask/wqflask/views.py
-
diff --git a/doc/gn_installation_notes.txt b/doc/old/gn_installation_notes.txt
index 584080f7..efea0309 100644
--- a/doc/gn_installation_notes.txt
+++ b/doc/old/gn_installation_notes.txt
@@ -96,7 +96,7 @@ Before installing from requirements.txt, install numpy separately:
 pip install numpy==1.7.0 (or whatever version we're using)
 
 Install from requirements.txt (after activating virtualenv):
-pip install -r gene/misc/requirements.txt
+pip install -r gene/doc/requirements.txt
 
 ===========================================
 
@@ -343,4 +343,4 @@ python runserver.py
 To do full upgrade (as opposed to apt-get upgrade)
 sudo aptitude full-upgrade
 
-===========================================
\ No newline at end of file
+===========================================
diff --git a/doc/notes.txt b/doc/old/notes.txt
index f8ce2759..f8ce2759 100644
--- a/doc/notes.txt
+++ b/doc/old/notes.txt
diff --git a/doc/requirements.txt b/doc/requirements.txt
deleted file mode 100644
index 39ee5652..00000000
--- a/doc/requirements.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-BeautifulSoup==3.2.1
-Flask==0.9
-Flask-Login==0.1.3
-Flask-Mail==0.7.6
-Flask-Principal==0.3.4
-Flask-SQLAlchemy==0.16
-Flask-Security==1.6.0
-Flask-WTF==0.8.3
-Jinja2==2.6
-MySQL-python==1.2.4
-PyYAML==3.10
-#Reaper==1.0
-Reindent==0.1.1
-SQLAlchemy==0.8.0
-WTForms==1.0.3
-Werkzeug==0.8.3
-apache-libcloud==0.12.3
-argparse==1.2.1
-blinker==1.2
-cairosvg==1.0.15
-itsdangerous==0.17
-logging-tree==1.2
-logilab-astng==0.24.3
-logilab-common==0.59.1
-#numarray==1.5.2
-numpy==1.7.0
-passlib==1.6.1
-pp==1.6.3
-pylint==0.27.0
-redis==2.7.2
-requests==1.1.0
-scipy==0.11.0
-simplejson==3.0.7
-wsgiref==0.1.2
-yolk==0.4.3
-XlsxWriter==0.7.2
diff --git a/doc/todo.txt b/doc/todo.txt
deleted file mode 100644
index 1d781b13..00000000
--- a/doc/todo.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-- Ask Rob about potentially recoding qtlreaper
-- Ask Rob about Probe/cellid traits
\ No newline at end of file