From 0589a3b87161280db2eab5fc95bde50ba7338d22 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 28 Jun 2016 11:02:38 +0000
Subject: Doc: Adding info on SQL use

---
 doc/database.org | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 156 insertions(+), 3 deletions(-)

diff --git a/doc/database.org b/doc/database.org
index d4c04848..df34e7ed 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -207,10 +207,18 @@ Metadata
 
 ?
 
-** Chr_Length
+** Chr_Length (/cross/BXD.json)
 
 Default mm9, column for mm8
 
+select * from Chr_Length;
+
+| Name | SpeciesId | OrderId | Length    | Length_mm8 |
+| 1    |         1 |       1 | 197195432 |  197069962 |
+| 2    |         1 |       2 | 181748087 |  181976762 |
+
+Table should be merged with
+
 ** Dataset_mbat
 
 Menu for BXD (linkouts)
@@ -275,10 +283,19 @@ Wiki info (nightly updated from NCBI)
 
 XRef should be foreign keys
 
-** Geno
+** Geno (genotype/marker/'marker'.json)
 
 SNP or marker info
 
+INFO:base.trait:.sql: retrieve_info:
+                                select Geno.Chr, Geno.Mb from Geno, Species
+                                where Species.Name = 'mouse' and
+                                Geno.Name = 'rs3693478' and
+                                Geno.SpeciesId = Species.Id
+
+| Id | SpeciesId | Name       | Marker_Name | Chr  | Mb        | Sequence                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | Source  | chr_num | Source2 | Comments | used_by_geno_file | Mb_mm8    | Chr_mm8 |
+|  1 |         1 | 01.001.695 | 01.001.695  | 1    |  4.678288 | GCCCTGCCCACCTCAGAGCAAGCTGCCACCCAGGAGTCCGTGTTTCAGGAGATGTGTGAGGAGGGCCTGCTGGAGGAGTGTGATGGTGAGGATGAGGCAGGCCGTGCCGCG[T/C]AGCCAGAGGCTGGTGATGGGACCACCGAGATCTCACCCACTGGTGCTGCTGATCCTGAGAAGAGGATGGAGAAGAAGACGGAGCAGCAGCACACCGGCGGCGGGAGAAAGCTGCTCGTAAGCTGCTCGTAAGCTACGGGTGCAGCAGGCTGCACTTAGGGCAGCCCGGCTTCAGCACCAAGAACTCTTCAGGCTGCATGGGATCAAGGCCCAGGTGGCCCGAAGGCTGGCAGAACTCGCACACGGGAGGGAGCAGCAGCGCATACAGCGACTGGCAGAGGCTGACAAGCCCCGAAGGCTGGGACGACTCAAGTACCAGGCTCCTGACATTGATGTGCAGCTCAGCTCTGAGCTGTCTGGCCCACTCAGGACACTGAAACCAGAAGGTCACATTCTCCAAGACAGGTTCAAGAGCTTCCAGAAGAGAAATATGATTGAGCCCCGAGAACGAGCCAAGTTCAAGCGCAAATAAAAAATGAAGTTGGTGGAGAAGCGGGCCTACCATGAGATTCAGTTGTAGCTGTGCAGATGTCGGAGCCCCGCCCCTCAATAAAGTTCTGTGACAAAAAAAAAAAAAAAAAAAGAAGAAGAAGAAGAAAAGGAAAAAAAAGAAGAAAAAGAAAAAAAAAGAAAAAAGAAAAAGAAAACACATCACTTGGCAAAACTCCATAGACTCTATGTGATTCATGTTTCAAACATGCACCTA | GNF_SNP |       1 | GNF     | NULL     | NULL              |  4.678288 | 1       |
+
 ** GenoCode
 
 Belongs to someone else
@@ -311,7 +328,7 @@ Heritability for probeset(?)
 
 Homology, not used much
 
-** InbredSet
+** InbredSet (/cross/BXD.info)
 
 Group in menu
 
@@ -490,6 +507,36 @@ select count(*) from ProbeSet limit 5;
 2 rows in set (0.00 sec)
 
 
+** ProbeSetXRef (phenotypes/dataset_name.json)
+
+For every probe set (read dataset measuring point):
+
+select * from ProbeSetXRef;
+
+| ProbeSetFreezeId | ProbeSetId | DataId   | Locus_old | LRS_old | pValue_old | mean             | se   | Locus      | LRS               | pValue | additive              | h2   |
+|              112 |     123528 | 23439389 | NULL      |    NULL |       NULL |  6.7460707070707 | NULL | rs6239372  |  10.9675593568894 |  0.567 |    0.0448545966228878 | NULL |
+|              112 |     123527 | 23439388 | NULL      |    NULL |       NULL | 6.19416161616162 | NULL | rs13476936 |  10.9075670392762 |  0.567 |   -0.0358456732993988 | NULL |
+
+where ProbeSetFreezeId is the dataset (experiment). ProbesetId refers
+to the probe set information (measuring point). DataId points to the
+data point. The othe values are used for search.
+
+It is used in search thus:
+
+SELECT distinct ProbeSet.Name as TNAME,
+  ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS,
+  ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM,
+  ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL,
+  ProbeSet.name_num as TNAME_NUM
+FROM ProbeSetXRef, ProbeSet
+WHERE ProbeSet.Id = ProbeSetXRef.ProbeSetId
+  and ProbeSetXRef.ProbeSetFreezeId = 112
+  ORDER BY ProbeSet.symbol ASC limit 5;
+
+| TNAME      | TMEAN            | TLRS               | TPVALUE               | TCHR_NUM | TMB        | TSYMBOL       | TNAME_NUM |
+| 1445618_at | 7.05679797979798 |   13.5417452764616 |                  0.17 |        8 |  75.077895 | NULL          |   1445618 |
+| 1452452_at |            7.232 |   30.4944361132252 | 0.0000609756097560421 |       12 |    12.6694 | NULL          |   1452452 |
+
 ** ProbeSetData
 
 Probedata - main molecular data. Probesets, metabolome,
@@ -691,6 +738,19 @@ show indexes from ProbeSetFreeze;
 | ProbeSetFreeze |          1 | NameIndex |            1 | Name2       | A         |           2 |     NULL | NULL   |      | BTREE      |         |               |
 +----------------+------------+-----------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+
 
+** ProbeSetSE
+
+ select * from ProbeSetSE limit 5;
++--------+----------+----------+
+| DataId | StrainId | error    |
++--------+----------+----------+
+|      1 |        1 | 0.681091 |
+|      1 |        2 | 0.361151 |
+|      1 |        3 | 0.364342 |
+|      1 |        4 | 0.827588 |
+|      1 |        5 | 0.303492 |
++--------+----------+----------+
+
 ** Publication and publishdata (all pheno)
 
 Phenotype pubs
@@ -794,6 +854,18 @@ INFO:db.call:.sql: __init__:
 
 INFO:db.call:.sql: ('BXD', 1)
 
+The actual search is
+
+SELECT distinct ProbeSet.Name as TNAME, 0 as thistable,
+  ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS,
+  ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM,
+  ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL,
+  ProbeSet.name_num as TNAME_NUM
+FROM ProbeSetXRef, ProbeSet
+WHERE ProbeSet.Id = ProbeSetXRef.ProbeSetId
+  and ProbeSetXRef.ProbeSetFreezeId = 112
+  ORDER BY ProbeSet.symbol ASC limit 5;
+
 INFO:base.species:.sql: __init__:
                 Select
                         Chr_Length.Name, Chr_Length.OrderId, Length from Chr_Length, InbredSet
@@ -876,6 +948,87 @@ INFO:base.data_set:.sql: get_trait_info:
 
 ** Fetch phenotypes
 
+*** Through the trait page
+
+When hitting the trait page, e.g.
+
+curl "http://localhost:5003/show_trait?trait_id=1443823_s_aet=HC_M2_0606_P"
+
+First the BXD's are queried with
+
+DEBUG:base.data_set:.get_samplelist: Sample list: : ['BXD1',
+ 'BXD2',
+ 'BXD5',
+ ...
+
+main probeset info (trait) is retrieved with
+
+SELECT ProbeSet.name, ProbeSet.symbol, ProbeSet.description, ProbeSet.probe_target_description, ProbeSet.chr, ProbeSet.mb, ProbeSet.alias, ProbeSet.geneid, ProbeSet.genbankid, ProbeSet.unigeneid, ProbeSet.omim, ProbeSet.refseq_transcriptid, ProbeSet.blatseq, ProbeSet.targetseq, ProbeSet.chipid, ProbeSet.comments, ProbeSet.strand_probe, ProbeSet.strand_gene, ProbeSet.probe_set_target_region, ProbeSet.probe_set_specificity, ProbeSet.probe_set_blat_score, ProbeSet.probe_set_blat_mb_start, ProbeSet.probe_set_blat_mb_end, ProbeSet.probe_set_strand, ProbeSet.probe_set_note_by_rw, ProbeSet.flag
+                    FROM ProbeSet, ProbeSetFreeze, ProbeSetXRef
+                    WHERE
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetFreeze.Name = 'HC_M2_0606_P' AND
+                            ProbeSet.Name = '1443823_s_at'
+
+Followed by
+
+INFO:base.trait:.sql: retrieve_info:
+   SELECT
+                                    ProbeSetXRef.Locus, ProbeSetXRef.LRS, ProbeSetXRef.pValue, ProbeSetXRef.mean, ProbeSetXRef.additive
+                            FROM
+                                    ProbeSetXRef, ProbeSet
+                            WHERE
+                                    ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                                    ProbeSet.Name = "1443823_s_at" AND
+                                    ProbeSetXRef.ProbeSetFreezeId =112
+
+| Locus       | LRS             | pValue                 | mean             | additive             |
+| NES13033186 | 35.466324074542 | 0.00000900000000003676 | 15.0551313131313 | -0.16750405405405402 |
+
+Then the interesting bit, the sample data is fetched with
+
+INFO:base.data_set:.sql: retrieve_sample_data:
+                   SELECT
+                            Strain.Name, ProbeSetData.value, ProbeSetSE.error, ProbeSetData.Id, Strain.Name2
+                    FROM
+                            (ProbeSetData, ProbeSetFreeze, Strain, ProbeSet, ProbeSetXRef)
+                    left join ProbeSetSE on
+                            (ProbeSetSE.DataId = ProbeSetData.Id AND ProbeSetSE.StrainId = ProbeSetData.StrainId)
+                    WHERE
+                            ProbeSet.Name = '1443823_s_at' AND ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
+                            ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
+                            ProbeSetFreeze.Name = 'HC_M2_0606_P' AND
+                            ProbeSetXRef.DataId = ProbeSetData.Id AND
+                            ProbeSetData.StrainId = Strain.Id
+                    Order BY
+                            Strain.Name
+
+| Name        | value  | error | Id       | Name2       |
+| 129S1/SvImJ | 14.552 |  NULL | 23422417 | 129S1/SvImJ |
+| A/J         |  14.34 |  NULL | 23422417 | A/J         |
+| AKR/J       | 14.338 |  NULL | 23422417 | AKR/J       |
+| B6D2F1      | 15.251 |  NULL | 23422417 | B6D2F1      |
+| BALB/cByJ   | 14.164 |  NULL | 23422417 | BALB/cByJ   |
+| BALB/cJ     | 14.563 |  NULL | 23422417 | BALB/cJ     |
+| BXD1        | 15.198 |  NULL | 23422417 | BXD1        |
+| BXD11       | 15.084 |  NULL | 23422417 | BXD11       |
+| BXD12       | 15.192 |  NULL | 23422417 | BXD12       |
+ etc.
+
+Then some repeated queries and this fetchest the nearest SNP
+
+INFO:wqflask.show_trait.show_trait:.sql: get_nearest_marker:
+ SELECT Geno.Name FROM Geno, GenoXRef, GenoFreeze WHERE Geno.Chr = '1'
+ AND GenoXRef.GenoId = Geno.Id AND GenoFreeze.Id =
+ GenoXRef.GenoFreezeId AND GenoFreeze.Name = 'BXDGeno' ORDER BY ABS(
+ Geno.Mb - 173.149434) limit 1;
+
+| Name        |
+| NES13033186 |
+
+*** Digging deeper
+
 To get at phenotype data ProbeSetData is the main table (almost all
 important molecular assay data is in this table including probe set
 data, RNA-seq data, proteomic data, and metabolomic data. 2.5 billion
-- 
cgit 1.4.1


From 537e1d19d33f1000103f3f33b52e87674ca89a31 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 28 Jun 2016 11:03:06 +0000
Subject: Logger: disabled some noisy loggers

---
 wqflask/wqflask/show_trait/SampleList.py | 4 ++--
 wqflask/wqflask/show_trait/show_trait.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/wqflask/wqflask/show_trait/SampleList.py b/wqflask/wqflask/show_trait/SampleList.py
index 5e3b092e..7e7503d4 100644
--- a/wqflask/wqflask/show_trait/SampleList.py
+++ b/wqflask/wqflask/show_trait/SampleList.py
@@ -32,7 +32,7 @@ class SampleList(object):
         self.sample_attribute_values = {}
 
         self.get_attributes()
-        logger.debug("camera: attributes are:", pf(self.attributes))
+        # logger.debug("camera: attributes are:", pf(self.attributes))
 
         if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet':
             self.get_extra_attribute_values()
@@ -55,7 +55,7 @@ class SampleList(object):
                 sample.extra_info['url'] = "/mouseCross.html#AXB/BXA"
                 sample.extra_info['css_class'] = "fs12"
 
-            logger.debug("  type of sample:", type(sample))
+            # logger.debug("  type of sample:", type(sample))
 
             if sample_group_type == 'primary':
                 sample.this_id = "Primary_" + str(counter)
diff --git a/wqflask/wqflask/show_trait/show_trait.py b/wqflask/wqflask/show_trait/show_trait.py
index 3eea3f4a..912beabe 100644
--- a/wqflask/wqflask/show_trait/show_trait.py
+++ b/wqflask/wqflask/show_trait/show_trait.py
@@ -146,7 +146,7 @@ class ShowTrait(object):
         else:
             self.sample_group_types['samples_primary'] = self.dataset.group.name
         sample_lists = [group.sample_list for group in self.sample_groups]
-        logger.debug("sample_lists is:", pf(sample_lists))
+        # logger.debug("sample_lists is:", pf(sample_lists))
 
         self.get_mapping_methods()
 
-- 
cgit 1.4.1


From fb6fcc3cd296b4b626650565119acbdd0188dc2d Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 8 Jul 2016 11:07:11 +0200
Subject: Docs: genotyping

---
 doc/Architecture.org | 60 +++++++++++++++++++++++++++++++++++++++++++---------
 doc/database.org     |  1 -
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/doc/Architecture.org b/doc/Architecture.org
index ed19889c..9a561568 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -1,19 +1,19 @@
-* GeneNetwork Architecture
-
 #+TITLE: Installing GeneNetwork services
 
 * Table of Contents                                                     :TOC:
- - [[#genenetwork-architecture][GeneNetwork Architecture]]
-   - [[#introduction][Introduction]]
-   - [[#webserver][Webserver]]
-   - [[#gnserver-rest][GnServer (REST)]]
+ - [[#introduction][Introduction]]
+ - [[#webserver][Webserver]]
+ - [[#gnserver-rest][GnServer (REST)]]
+ - [[#database-][Database ]]
+   - [[#phenotypes][Phenotypes]]
+   - [[#genotypes][Genotypes]]
 
-** Introduction
+* Introduction
 
 This document describes the architecture of GN2. Because GN2 is
 evolving, only a high-level overview is given here.
 
-** Webserver
+* Webserver
 
 The main [[https://github.com/genenetwork/genenetwork2][GN2 webserver]] is built on [[http://flask.pocoo.org/][Python flask]] and this GN2 source
 code can be found on [[https://github.com/genenetwork/genenetwork2/tree/master/wqflask/wqflask][github]] in the wqflask directory. The routing
@@ -44,7 +44,7 @@ Consortium M430v2 (Jun06) PDNN to find all records with MEAN between
 15 and 16 and with LRS between 23 and 46.'. Then the results are added
 to a table which is displayed using a JS [[https://datatables.net/][DataTable container]].
 
-** GnServer (REST)
+* GnServer (REST)
 
 The [[https://github.com/genenetwork/gn_server][GnServer REST API]] is built on high performance [[http://elixir-lang.org/][Elixir]] with [[https://github.com/falood/maru][Maru]].
 Mainly the GnServer serves JSON requests, for example to fetch data
@@ -52,4 +52,44 @@ from the database. To get the menu data in YAML you can do something like
 
 : curl localhost:8880/int/menu/main.json|ruby extra/json2yaml.rb
 
-(json2yaml.rb is in the gn_server repo).
+(json2yaml.rb is in the gn_server repo). For the current API definition
+see [[https://github.com/genenetwork/gn_server/doc/API.md][GnServer REST API]] documentation.
+
+* Database 
+
+** Phenotypes
+
+Phenotypes are stored in the SQL database.  For what happens at the
+database level see [[database.org]]. A test database can be downloaded -
+see the installation [[./README.org][instructions]].
+
+** Genotypes
+
+Genotypes are stored in genotype files. These are part of the GNU Guix
+distribution, see the installation [[./README.org][instructions]]. Genotype files are
+currently in GN1 format, and will be aligned with the R/qtl2 format. Now it is
+for BXD.geno:
+
+#+begin_src js
+@name:BXD
+@type:riset
+@mat:B
+@pat:D
+@het:H
+@unk:U
+Chr Locus cM  Mb  BXD1  BXD2  BXD5  BXD6  BXD8  BXD9  BXD11 BXD12 BXD13 BXD14 BX
+D15 BXD16 BXD18 BXD19 BXD20 BXD21 BXD22 BXD23 BXD24a  BXD24 BXD25 BXD27 BXD28 BX
+D29 BXD30 BXD31 BXD32 BXD33 BXD34 BXD35 BXD36 BXD37 BXD38 BXD39 BXD40 BXD41 BXD4
+2 BXD43 BXD44 BXD45 BXD48 BXD49 BXD50 BXD51 BXD52 BXD53 BXD54 BXD55 BXD56 BXD59
+BXD60 BXD61 BXD62 BXD63 BXD64 BXD65 BXD66 BXD67 BXD68 BXD69 BXD70 BXD71 BXD72 BX
+D73 BXD74 BXD75 BXD76 BXD77 BXD78 BXD79 BXD80 BXD81 BXD83 BXD84 BXD85 BXD86 BXD8
+7 BXD88 BXD89 BXD90 BXD91 BXD92 BXD93 BXD94 BXD95 BXD96 BXD97 BXD98 BXD99 BXD100
+  BXD101  BXD102  BXD103
+1 rs6269442 0.0 3.482275  B B D D D B B D B B D D B D D D D B B B D B D D B B B
+B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B
+D D D D D H B D D B D B B D D B D D B B B B B B B D
+1 rs6365999 0.0 4.811062  B B D D D B B D B B D D B D D D D B B B D B D D B B B
+B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B
+D D D D D H B D D B D B B D D B D D B B B B B B U D
+etc
+#+end_src
diff --git a/doc/database.org b/doc/database.org
index df34e7ed..2c7dc887 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -506,7 +506,6 @@ select count(*) from ProbeSet limit 5;
 +------+--------+----------+----------+--------+----------------------------------------------+------+-----------+----------+--------+-----------+------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------+--------------+-------------+--------+----------+-------------------------+-----------------------+----------------------+-------------------------+-----------------------+------------------+----------------------+------+----------+---------------+--------------+------+---------+----------+---------+----------+--------------------------+---------------------+---------+-----------+-----------------------------+---------------------------+--------------+-------------+-----------+-------------+------------+---------+------------+----------+-----------+------------+------------+---------------+---------+-----------+---------+------------------+-------------+------+--------+-------------+----------------+-----------------+
 2 rows in set (0.00 sec)
 
-
 ** ProbeSetXRef (phenotypes/dataset_name.json)
 
 For every probe set (read dataset measuring point):
-- 
cgit 1.4.1


From 0e975e62810c1b8afac13e1fa039a3998fc58ba9 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 16 Jul 2016 12:00:55 +0600
Subject: README: added info

---
 doc/Architecture.org            | 5 +++--
 wqflask/maintenance/README.md   | 4 ++++
 wqflask/maintenance/__init__.py | 0
 3 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 wqflask/maintenance/README.md
 delete mode 100644 wqflask/maintenance/__init__.py

diff --git a/doc/Architecture.org b/doc/Architecture.org
index 9a561568..cac40939 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -56,7 +56,6 @@ from the database. To get the menu data in YAML you can do something like
 see [[https://github.com/genenetwork/gn_server/doc/API.md][GnServer REST API]] documentation.
 
 * Database 
-
 ** Phenotypes
 
 Phenotypes are stored in the SQL database.  For what happens at the
@@ -67,7 +66,7 @@ see the installation [[./README.org][instructions]].
 
 Genotypes are stored in genotype files. These are part of the GNU Guix
 distribution, see the installation [[./README.org][instructions]]. Genotype files are
-currently in GN1 format, and will be aligned with the R/qtl2 format. Now it is
+currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2 formats]]. Now it is
 for BXD.geno:
 
 #+begin_src js
@@ -93,3 +92,5 @@ B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B
 D D D D D H B D D B D B B D D B D D B B B B B B U D
 etc
 #+end_src
+
+and, for example, in the method run_rqtl_geno this file gets loaded.
diff --git a/wqflask/maintenance/README.md b/wqflask/maintenance/README.md
new file mode 100644
index 00000000..873eaa32
--- /dev/null
+++ b/wqflask/maintenance/README.md
@@ -0,0 +1,4 @@
+Maintenance files have been moved into a separate repository named
+*gn_extra*. See https://github.com/genenetwork/gn_extra
+
+
diff --git a/wqflask/maintenance/__init__.py b/wqflask/maintenance/__init__.py
deleted file mode 100644
index e69de29b..00000000
-- 
cgit 1.4.1


From b6ce5fa5097b642d9d0f8e9ced614ee20e8ad58c Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 16 Jul 2016 13:19:27 +0600
Subject: R/qtl: geno format

---
 doc/Architecture.org | 33 +++++++++++++++++++++++++++------
 doc/database.org     |  1 -
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/doc/Architecture.org b/doc/Architecture.org
index cac40939..fe3eae39 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -4,7 +4,7 @@
  - [[#introduction][Introduction]]
  - [[#webserver][Webserver]]
  - [[#gnserver-rest][GnServer (REST)]]
- - [[#database-][Database ]]
+ - [[#database][Database]]
    - [[#phenotypes][Phenotypes]]
    - [[#genotypes][Genotypes]]
 
@@ -55,7 +55,7 @@ from the database. To get the menu data in YAML you can do something like
 (json2yaml.rb is in the gn_server repo). For the current API definition
 see [[https://github.com/genenetwork/gn_server/doc/API.md][GnServer REST API]] documentation.
 
-* Database 
+* Database
 ** Phenotypes
 
 Phenotypes are stored in the SQL database.  For what happens at the
@@ -66,8 +66,8 @@ see the installation [[./README.org][instructions]].
 
 Genotypes are stored in genotype files. These are part of the GNU Guix
 distribution, see the installation [[./README.org][instructions]]. Genotype files are
-currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2 formats]]. Now it is
-for BXD.geno:
+currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2
+formats]]. Currently it is for the stored file BXD.geno:
 
 #+begin_src js
 @name:BXD
@@ -90,7 +90,28 @@ D D D D D H B D D B D B B D D B D D B B B B B B B D
 1 rs6365999 0.0 4.811062  B B D D D B B D B B D D B D D D D B B B D B D D B B B
 B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B
 D D D D D H B D D B D B B D D B D D B B B B B B U D
-etc
+...
 #+end_src
 
-and, for example, in the method run_rqtl_geno this file gets loaded.
+and, for example, in the method run_rqtl_geno this file gets
+loaded. For GnServer, however, we only want to deal with standardized
+R/qtl formatted data, so with gn_extra we convert the original format
+into R/qtl format with geno2rqtl with one adaptation: the geno table
+is transposed so now becomes
+
+#+begin_src js
+id,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
+1,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D
+2,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
+3,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
+...
+#+end_src js
+
+i.e. individuals are columns and markers are rows. Alternatively it could look like
+
+#+begin_src js
+id,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
+rs6269442,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D
+rs6365999,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
+rs6376963,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
+#+end_src js
diff --git a/doc/database.org b/doc/database.org
index 2c7dc887..9c8c89f2 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -946,7 +946,6 @@ INFO:base.data_set:.sql: get_trait_info:
 (that is a bug!).
 
 ** Fetch phenotypes
-
 *** Through the trait page
 
 When hitting the trait page, e.g.
-- 
cgit 1.4.1


From 56636bf3fabedd3fb581ddc4a95fcf3e586fd607 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sun, 17 Jul 2016 11:37:27 +0600
Subject: Doc: genotype data

---
 doc/Architecture.org | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/doc/Architecture.org b/doc/Architecture.org
index fe3eae39..e3bc8026 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -100,7 +100,7 @@ into R/qtl format with geno2rqtl with one adaptation: the geno table
 is transposed so now becomes
 
 #+begin_src js
-id,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
+marker,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
 1,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D
 2,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
 3,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
@@ -110,8 +110,54 @@ id,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19
 i.e. individuals are columns and markers are rows. Alternatively it could look like
 
 #+begin_src js
-id,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
+marker,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103
 rs6269442,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D
 rs6365999,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
 rs6376963,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D
 #+end_src js
+
+This is also the format provided by R/qtl in
+https://github.com/rqtl/qtl2data/tree/master/DO_Recla which we will
+use as the base line for the REST server. In the meta json file the
+genotype data is tagged as transposed:
+
+#+begin_src js
+{
+"description": "DO data from Recla et al. (2014) Mamm Genome 25:211-222",
+"crosstype": "do",
+"geno": "recla_geno.csv",
+"geno_transposed": true,
+"founder_geno": "recla_foundergeno.csv",
+"founder_geno_transposed": true,
+"genotypes": {
+"1": "1",
+"2": "2",
+"3": "3"
+},
+"pheno": "recla_pheno.csv",
+"pheno_transposed": false,
+"covar": "recla_covar.csv",
+"sex": {
+"covar": "Sex",
+"female": "female",
+"male": "male"
+},
+"x_chr": "X",
+"cross_info": {
+"covar": "ngen"
+},
+"gmap": "recla_gmap.csv",
+"pmap": "recla_pmap.csv",
+"alleles": ["A", "B", "C", "D", "E", "F", "G", "H"]
+}
+#+end_src
+
+Meanwhile the gmap file looks like
+
+#+begin_src js
+marker,chr,pos,Mb
+rs6269442,1,0.0,3.482275
+rs6365999,1,0.0,4.811062
+rs6376963,1,0.895,5.008089
+rs3677817,1,1.185,5.176058
+#+end_src
-- 
cgit 1.4.1


From 48af807b4d8eabb0fac53451333fe7e85d38855b Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Thu, 21 Jul 2016 12:55:23 +0600
Subject: Doc: SNP handling

---
 doc/database.org | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/doc/database.org b/doc/database.org
index 9c8c89f2..624174a4 100644
--- a/doc/database.org
+++ b/doc/database.org
@@ -945,7 +945,7 @@ INFO:base.data_set:.sql: get_trait_info:
 
 (that is a bug!).
 
-** Fetch phenotypes
+** Fetch phenotype information
 *** Through the trait page
 
 When hitting the trait page, e.g.
@@ -1165,3 +1165,51 @@ select * from ProbeSetData limit 5;
 5 rows in set (0.00 sec)
 
 linked by ProbeSetXRef.dataid.
+** Fetch genotype information
+
+*** SNPs
+
+The SNP count info for the BXD is calculated like this
+
+#+begin_src python
+        while startMb<endMb:
+            snp_count = g.db.execute("""
+                    select
+                            count(*) from BXDSnpPosition
+                    where
+                            Chr = '%s' AND Mb >= %2.6f AND Mb < %2.6f AND
+                            StrainId1 = %d AND StrainId2 = %d
+                    """ % (chrName, startMb, startMb+stepMb, strainId1, strainId2)).fetchone()[0]
+            SNPCounts.append(snp_count)
+            startMb += stepMb
+#+end_src
+
+select * from BXDSnpPosition limit 5;
++------+-----------+-----------+----------+
+| Chr  | StrainId1 | StrainId2 | Mb       |
++------+-----------+-----------+----------+
+| 1    |         2 |         3 | 0.002477 |
+| 1    |         2 |         3 | 0.002592 |
+| 1    |         2 |         3 |  0.00283 |
+| 1    |         2 |         3 | 0.002994 |
+| 1    |         2 |         3 | 0.003299 |
++------+-----------+-----------+----------+
+
+Other SNP tables containing
+
+select * from SnpSource limit 5;
+Empty set (0.00 sec)
+
+select * from SnpAll limit 5;
+Empty set (0.00 sec)
+
+mysql> select * from SnpAll limit 5;
+Empty set (0.00 sec)
+
+mysql> select * from SnpPattern limit 5;
+Empty set (0.00 sec)
+
+mysql> select * from SnpSource limit 5;
+Empty set (0.00 sec)
+
+Hmmm. This is the test database. Then there are the plink files and VCF files.
-- 
cgit 1.4.1


From 41a8f2381b0d2d2f7d6aa5d50dee93d96af3ff39 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Tue, 26 Jul 2016 10:49:46 +0600
Subject: Docs: genotype

---
 doc/Architecture.org                                  | 19 ++++++++++---------
 .../marker_regression/marker_regression_gn1.py        |  2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/doc/Architecture.org b/doc/Architecture.org
index e3bc8026..a1fcbef2 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -66,8 +66,9 @@ see the installation [[./README.org][instructions]].
 
 Genotypes are stored in genotype files. These are part of the GNU Guix
 distribution, see the installation [[./README.org][instructions]]. Genotype files are
-currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2
-formats]]. Currently it is for the stored file BXD.geno:
+currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2 formats]].
+
+GN1-style (still default GN2) for the stored file BXD.geno:
 
 #+begin_src js
 @name:BXD
@@ -130,21 +131,21 @@ genotype data is tagged as transposed:
 "founder_geno": "recla_foundergeno.csv",
 "founder_geno_transposed": true,
 "genotypes": {
-"1": "1",
-"2": "2",
-"3": "3"
+  "1": "1",
+  "2": "2",
+  "3": "3"
 },
 "pheno": "recla_pheno.csv",
 "pheno_transposed": false,
 "covar": "recla_covar.csv",
 "sex": {
-"covar": "Sex",
-"female": "female",
-"male": "male"
+  "covar": "Sex",
+  "female": "female",
+  "male": "male"
 },
 "x_chr": "X",
 "cross_info": {
-"covar": "ngen"
+  "covar": "ngen"
 },
 "gmap": "recla_gmap.csv",
 "pmap": "recla_pmap.csv",
diff --git a/wqflask/wqflask/marker_regression/marker_regression_gn1.py b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
index 9573a9de..33ebc527 100644
--- a/wqflask/wqflask/marker_regression/marker_regression_gn1.py
+++ b/wqflask/wqflask/marker_regression/marker_regression_gn1.py
@@ -382,7 +382,7 @@ class MarkerRegression(object):
             self.GraphInterval = self.MbGraphInterval #Mb
         else:
             self.GraphInterval = self.cMGraphInterval #cM
-			
+
         ################################################################
         # Get Trait Values and Infomation
         ################################################################
-- 
cgit 1.4.1


From bad44474c64bb65edc13ed39d639117ab558ce15 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Mon, 8 Aug 2016 09:20:44 +0000
Subject: Doc: gn_exec

---
 doc/Architecture.org | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/Architecture.org b/doc/Architecture.org
index a1fcbef2..04e05e40 100644
--- a/doc/Architecture.org
+++ b/doc/Architecture.org
@@ -4,6 +4,7 @@
  - [[#introduction][Introduction]]
  - [[#webserver][Webserver]]
  - [[#gnserver-rest][GnServer (REST)]]
+ - [[#gnexec][GnExec]]
  - [[#database][Database]]
    - [[#phenotypes][Phenotypes]]
    - [[#genotypes][Genotypes]]
@@ -55,6 +56,11 @@ from the database. To get the menu data in YAML you can do something like
 (json2yaml.rb is in the gn_server repo). For the current API definition
 see [[https://github.com/genenetwork/gn_server/doc/API.md][GnServer REST API]] documentation.
 
+* GnExec
+
+GnExec, also written in Elixir, executes commands using a separate
+daemon.
+
 * Database
 ** Phenotypes
 
-- 
cgit 1.4.1


From 7cc37bf2efba6873fccd0f1756c89d25400afd47 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 9 Sep 2016 08:34:36 +0200
Subject: Doc: note on guix paths

---
 doc/README.org | 46 ++++++++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 20 deletions(-)

diff --git a/doc/README.org b/doc/README.org
index b3c78f29..aa05654f 100644
--- a/doc/README.org
+++ b/doc/README.org
@@ -117,7 +117,7 @@ cd guix-gn-latest
 ** Step 3: Authorize the GN Guix server
 
 GN2 has its own GNU Guix binary distribution server. To trust it you have
-to add the following key 
+to add the following key
 
 #+begin_src scheme
 (public-key
@@ -136,9 +136,9 @@ guix archive --authorize
 
 and hit Ctrl-D.
 
-Now you can use the substitute server to install GN2 binaries. 
+Now you can use the substitute server to install GN2 binaries.
 
-** Step 4: Install and run GN2 
+** Step 4: Install and run GN2
 
 Since this is a quick and dirty install we are going to override the
 GNU Guix package path by pointing the package path to our repository:
@@ -208,7 +208,7 @@ https://s3.amazonaws.com/genenetwork2/db_webqtl_s.zip
 Check the md5sum.
 
 After installation inflate the database binary in the MySQL directory
-(this installation path is subject to change soon) 
+(this installation path is subject to change soon)
 
 : chown -R mysql:mysql db_webqtl_s/
 : chmod 700 db_webqtl_s/
@@ -271,10 +271,10 @@ R_LIBS_SITE are set) from the information given by guix:
 Inside the repository:
 
 : cd genenetwork2
-: ./bin/genenetwork2 
+: ./bin/genenetwork2
 
-Will fire up your local repo http://localhost:5003/ using the  
-settings in ./etc/default_settings.py. These settings may 
+Will fire up your local repo http://localhost:5003/ using the
+settings in ./etc/default_settings.py. These settings may
 not reflect your system. To override settings create your own from a copy of
 default_settings.py and pass it into GN2 with
 
@@ -348,7 +348,7 @@ Make dirs
 
 Add users
 
-: adduser nobody ; addgroup nobody 
+: adduser nobody ; addgroup nobody
 
 Run nginx
 
@@ -392,6 +392,12 @@ Make a note of the paths with
 ./pre-inst-env guix package --search-paths
 #+end_src bash
 
+or this should also work if guix is installed
+
+#+begin_src bash
+guix package --search-paths
+#+end_src bash
+
 After setting the paths for the server
 
 #+begin_src bash
@@ -413,7 +419,7 @@ genenetwork2
 will start the default server which listens on port 5003, i.e.,
 http://localhost:5003/.
 
-OK, we are where we were before with step 4. Only difference is that we 
+OK, we are where we were before with step 4. Only difference is that we
 used our own compiled guix server.
 
 * Trouble shooting
@@ -433,7 +439,7 @@ On one system:
 : export R_LIBS_SITE="$HOME/.guix-profile/site-library/"
 : export GEM_PATH="$HOME/.guix-profile/lib/ruby/gems/2.2.0"
 
-and perhaps a few more. 
+and perhaps a few more.
 ** ERROR: can not find directory $HOME/gn2_data
 
 The default settings file looks in your $HOME/gn2_data. Since these
@@ -466,7 +472,7 @@ and a download of the test database.
 <user01> set to the ones in ~/.guix-profile/
 <pjotrp> good, and you are in gn-latest-guix repo  [07:06]
 <user01> yep  [07:07]
-<pjotrp> git log shows 
+<pjotrp> git log shows
 
 Author: David Thompson <dthompson2@worcester.edu>
 Date:   Sun Mar 27 21:20:19 2016 -0400
@@ -488,7 +494,7 @@ genenetwork2-files-small        1.0     out ../guix-bioinformatics/gn/packages/g
 <user01> hah, I don't have screen installed yet  [07:11]
 <pjotrp> comes with guix ;)  [07:12]
 <pjotrp> no worries, you can run it any way you want
-<pjotrp> $HOME/.guix-profile/bin/guix-daemon --build-users-group=guixbuild 
+<pjotrp> $HOME/.guix-profile/bin/guix-daemon --build-users-group=guixbuild
 <user01> then something's weird, because it says I don't have it
 <pjotrp> oh, you need to install it first  [07:13]
 <pjotrp> guix package -A screen
@@ -546,11 +552,11 @@ The following derivations would be built:
 <pjotrp> https://github.com/pjotrp/guix-notes/blob/master/REPRODUCIBLE.org
 <pjotrp> this is exactly what we are doing now
 <user01> alrighty  [07:35]
-<pjotrp> To see if a remote server has a guix server running it should respond 
+<pjotrp> To see if a remote server has a guix server running it should respond
                                                                         [07:36]
 <pjotrp> lynx http://guix.genenetwork.org:8080 --dump
 <pjotrp> Resource not found: /
-<pjotrp> 
+<pjotrp>
 <pjotrp> you see that?
 <user01> yes  [07:37]
 <pjotrp> good. The main hydra server is too slow. So on my laptop I forced
@@ -558,7 +564,7 @@ The following derivations would be built:
 <pjotrp> env GUIX_PACKAGE_PATH=../guix-bioinformatics/ ./pre-inst-env guix
          package -i genenetwork2 --dry-run
          --substitute-urls="http://mirror.hydra.gnu.org"
-<pjotrp> 
+<pjotrp>
 <pjotrp> the list looks the same to me  [07:40]
 <user01> me too
 <pjotrp> note that some packages will be built and some downloaded, right?
@@ -688,7 +694,7 @@ The following derivations would be built:
 <pjotrp> everything should be pre-built from guix.genenetwork.org
 <pjotrp> you are downloading?
 <user02> yes  [09:15]
-<pjotrp> cool. Maybe an idea to set up a server 
+<pjotrp> cool. Maybe an idea to set up a server
 <pjotrp> for your own use
 <user02> Stuck at downloading preprocesscore
 <pjotrp> should not  [09:24]
@@ -735,7 +741,7 @@ The following derivations would be built:
 <pjotrp> should be at
          /gnu/store/y1f3r2xs3fhyadd46nd2aqbr2p9qv2ra-r-biocpreprocesscore-1.32.0
                                                                         [09:33]
-<pjotrp> 
+<pjotrp>
 <user03> pjotrp: Possibly we should use the archive utility of Guix to do
         deployment to avoid such out-of-sync differences :)  [09:34]
 <pjotrp> maybe. I did not get archive to update profiles properly  [09:37]
@@ -802,7 +808,7 @@ The following derivations would be built:
 <pjotrp> but do not checkout that genetwork2_diet
 <pjotrp> we reverted to the main tree
 <pjotrp> clone git@github.com:genenetwork/genenetwork2.git  [09:53]
-<pjotrp> instead and checkout the staging branch 
+<pjotrp> instead and checkout the staging branch
 <pjotrp> that is effectively my branch  [09:54]
 <pjotrp> when that is done you should be able to fire up the webserver from
          there  [09:55]
@@ -825,7 +831,7 @@ The following derivations would be built:
 <user01> yep
 <pjotrp> that can also run on remote files over ssh
 <pjotrp> that's an alternative
-<pjotrp> kudos for using emacs :), wdyt user03 
+<pjotrp> kudos for using emacs :), wdyt user03
 <user02> 79 minutes to go downloading the db
 <pjotrp> user02: sorry about that  [09:59]
 <pjotrp> it is 2GB
@@ -850,7 +856,7 @@ The following derivations would be built:
          --substitute-urls="http://guix.genenetwork.org:8080"   [10:08]
 <pjotrp> elixir  1.2.3   out
          ../guix-bioinformatics/gn/packages/elixir.scm:31:2
-<pjotrp> 
+<pjotrp>
 <pjotrp> I am building it on guix.genenetwork.org right now  [10:09]
 <user01> nice  [10:10]
 #+end_src
-- 
cgit 1.4.1


From 0621666fba97b3646271bb037b6c43503e981abf Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 10 Sep 2016 10:03:44 +0200
Subject: Doc: Rpy2 note

---
 doc/README.org | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/doc/README.org b/doc/README.org
index aa05654f..2b27d562 100644
--- a/doc/README.org
+++ b/doc/README.org
@@ -6,7 +6,7 @@
    - [[#step-1-install-gnu-guix][Step 1: Install GNU Guix]]
    - [[#step-2-checkout-the-gn2-git-repositories][Step 2: Checkout the GN2 git repositories]]
    - [[#step-3-authorize-the-gn-guix-server][Step 3: Authorize the GN Guix server]]
-   - [[#step-4-install-and-run-gn2-][Step 4: Install and run GN2 ]]
+   - [[#step-4-install-and-run-gn2][Step 4: Install and run GN2]]
  - [[#run-mysql-server][Run MySQL server]]
  - [[#gn2-dependency-graph][GN2 Dependency Graph]]
  - [[#source-deployment][Source deployment]]
@@ -20,6 +20,7 @@
    - [[#importerror-no-module-named-jinja2][ImportError: No module named jinja2]]
    - [[#error-can-not-find-directory-homegn2_data][ERROR: can not find directory $HOME/gn2_data]]
    - [[#cant-run-a-module][Can't run a module]]
+   - [[#rpy2-error-show-now-found][Rpy2 error 'show' now found]]
  - [[#irc-session][IRC session]]
 
 * Introduction
@@ -453,6 +454,21 @@ In rare cases, development modules are not brought in with Guix
 because no source code is available. This can lead to missing modules
 on a running server. Please check with the authors when a module
 is missing.
+** Rpy2 error 'show' now found
+
+This error
+
+: __show = rpy2.rinterface.baseenv.get("show")
+: LookupError: 'show' not found
+
+means that R was updated in your path, and that Rpy2 needs to be
+recompiled against this R - don't you love informative messages?
+
+In our case it means that GN's PYTHONPATH is not in sync with
+R_LIBS_SITE. Please check your GNU Guix GN2 installation paths,
+you man need to reinstall. Note that this may be the point you
+may want to start using profiles (see profile section).
+
 * IRC session
 
 Here an IRC session where we installed GN2 from scratch using GNU Guix
-- 
cgit 1.4.1


From ec7d4e1198bc8d2f83ce99b41e2084fbce6a0be7 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 10 Sep 2016 11:43:21 +0200
Subject: Support for running maintenance scripts so they can pick up all
 webserver settings

Run with

   ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
---
 bin/genenetwork2                          | 27 ++++++++++++++++++++++++---
 wqflask/base/data_set.py                  |  2 +-
 wqflask/base/webqtlConfig.py              |  1 -
 wqflask/maintenance/gen_select_dataset.py | 26 +++++++++++++++++---------
 wqflask/utility/tools.py                  |  1 +
 5 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/bin/genenetwork2 b/bin/genenetwork2
index d3bf3299..3a8c3ff4 100755
--- a/bin/genenetwork2
+++ b/bin/genenetwork2
@@ -1,6 +1,14 @@
 #! /bin/bash
 #
-# This will run the GN2 server (with default settings if none supplied).
+# This will run the GN2 server (with default settings if none supplied). Pass in
+# your own settings file, e.g.
+#
+#   ./bin/genenetwork2 ~/my_settings.py
+#
+# To run a maintenance script with settings (instead of the webserver) add that with
+# a -c switch, e.g.
+#
+#   ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
 #
 # Environment settings can be used to preconfigure as well as a
 # settings.py file.
@@ -18,7 +26,12 @@ echo $GN2_BASE_PATH
 
 # Handle settings parameter
 settings=$1
-if [ -z $settings ]; then settings=$GN2_BASE_PATH/etc/default_settings.py ; fi
+if [ -z $settings ]; then
+    # get default
+    settings=$GN2_BASE_PATH/etc/default_settings.py
+else
+    shift
+fi
 if [ ! -e $settings ]; then
     echo "ERROR: can not locate settings file - pass it in the command line"
     exit 1
@@ -32,7 +45,15 @@ export PYTHONPATH=$GN2_BASE_PATH/wqflask:$PYTHONPATH
 if [ -z $TEMPDIR ]; then
     TEMPDIR="/tmp"
 fi
-                         
+
+# Now handle command parameter -c
+if [ $1 = '-c' ] ; then
+    echo PYTHONPATH=$PYTHONPATH
+    echo RUNNING COMMAND $2
+    /usr/bin/env python $2
+    exit 0
+fi
+
 echo "Starting the redis server:"
 echo -n "dir $TEMPDIR
 dbfilename gn2.rdb
diff --git a/wqflask/base/data_set.py b/wqflask/base/data_set.py
index 1b4e1195..6cd3c8e6 100644
--- a/wqflask/base/data_set.py
+++ b/wqflask/base/data_set.py
@@ -91,7 +91,7 @@ Publish or ProbeSet. E.g.
         if USE_GN_SERVER:
             data = menu_main()
         else:
-            file_name = "wqflask/static/new/javascript/dataset_menu_structure.json"
+            file_name = "wqflask/wqflask/static/new/javascript/dataset_menu_structure.json"
             with open(file_name, 'r') as fh:
                 data = json.load(fh)
 
diff --git a/wqflask/base/webqtlConfig.py b/wqflask/base/webqtlConfig.py
index f76d8140..8c67a6fd 100644
--- a/wqflask/base/webqtlConfig.py
+++ b/wqflask/base/webqtlConfig.py
@@ -80,4 +80,3 @@ PORTADDR = "http://50.16.251.170"
 INFOPAGEHREF = '/dbdoc/%s.html'
 CGIDIR = '/webqtl/' #XZ: The variable name 'CGIDIR' should be changed to 'PYTHONDIR'
 SCRIPTFILE = 'main.py'
-
diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index d39bf4a5..5c25c15b 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -1,7 +1,9 @@
 """Script that generates the data for the main dropdown menus on the home page
 
 Writes out data as /static/new/javascript/dataset_menu_structure.json
-It needs to be run manually when database has been changed.
+It needs to be run manually when database has been changed. Run it as
+
+    python gen_select_dataset.py
 
 """
 
@@ -37,9 +39,15 @@ from __future__ import print_function, division
 #print("cdict is:", cdict)
 
 import sys
-# import zach_settings # no hard code paths!
 
-# import MySQLdb
+# NEW: Note we prepend the current path - otherwise a guix instance of GN2 is used instead
+sys.path.insert(0,'./wqflask')
+# NEW: import app to avoid a circular dependency on utility.tools
+from wqflask import app
+
+from utility.tools import locate, locate_ignore_error, TEMPDIR, SQL_URI
+
+import MySQLdb
 
 # import simplejson as json
 import urlparse
@@ -55,14 +63,13 @@ from pprint import pformat as pf
 
 #conn = Engine.connect()
 
-print('ERROR: This conversion is now OBSOLETE as the menu gets built from the database in Javascript using GN_SERVER instead!')
-sys.exit()
+print('WARNING: This conversion is now OBSOLETE as the menu gets built from the database in Javascript using GN_SERVER instead!')
 
 
 def parse_db_uri(db_uri):
     """Converts a database URI to the db name, host name, user name, and password"""
 
-    parsed_uri = urlparse.urlparse(zach_settings.DB_URI)
+    parsed_uri = urlparse.urlparse(SQL_URI)
 
     db_conn_info = dict(
                         db = parsed_uri.path[1:],
@@ -70,6 +77,7 @@ def parse_db_uri(db_uri):
                         user = parsed_uri.username,
                         passwd = parsed_uri.password)
 
+    print(db_conn_info)
     return db_conn_info
 
 
@@ -258,7 +266,7 @@ def build_datasets(species, group, type_name):
 def main():
     """Generates and outputs (as json file) the data for the main dropdown menus on the home page"""
 
-    parse_db_uri(zach_settings.SQL_URI)
+    parse_db_uri(SQL_URI)
 
     species = get_species()
     groups = get_groups(species)
@@ -281,7 +289,7 @@ def main():
 
     #print("data:", data)
 
-    output_file = """../wqflask/static/new/javascript/dataset_menu_structure.json"""
+    output_file = """./wqflask/wqflask/static/new/javascript/dataset_menu_structure.json"""
 
     with open(output_file, 'w') as fh:
         json.dump(data, fh, indent="   ", sort_keys=True)
@@ -297,6 +305,6 @@ def _test_it():
     #print("build_datasets:", pf(datasets))
 
 if __name__ == '__main__':
-    Conn = MySQLdb.Connect(**parse_db_uri(zach_settings.SQL_URI))
+    Conn = MySQLdb.Connect(**parse_db_uri(SQL_URI))
     Cursor = Conn.cursor()
     main()
diff --git a/wqflask/utility/tools.py b/wqflask/utility/tools.py
index bb8241f5..2c8cc5c5 100644
--- a/wqflask/utility/tools.py
+++ b/wqflask/utility/tools.py
@@ -3,6 +3,7 @@
 
 import os
 import sys
+
 from wqflask import app
 
 # Use the standard logger here to avoid a circular dependency
-- 
cgit 1.4.1


From 5e106bad6e59d3a13be0b79715785948a3ff581a Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Sat, 10 Sep 2016 11:57:55 +0200
Subject: Comment

---
 wqflask/maintenance/gen_select_dataset.py              |  2 +-
 wqflask/wqflask/marker_regression/marker_regression.py | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/wqflask/maintenance/gen_select_dataset.py b/wqflask/maintenance/gen_select_dataset.py
index 5c25c15b..f2f0830f 100644
--- a/wqflask/maintenance/gen_select_dataset.py
+++ b/wqflask/maintenance/gen_select_dataset.py
@@ -3,7 +3,7 @@
 Writes out data as /static/new/javascript/dataset_menu_structure.json
 It needs to be run manually when database has been changed. Run it as
 
-    python gen_select_dataset.py
+  ./bin/genenetwork2 ~/my_settings.py -c ./wqflask/maintenance/gen_select_dataset.py
 
 """
 
diff --git a/wqflask/wqflask/marker_regression/marker_regression.py b/wqflask/wqflask/marker_regression/marker_regression.py
index f3b018b8..a1065adc 100644
--- a/wqflask/wqflask/marker_regression/marker_regression.py
+++ b/wqflask/wqflask/marker_regression/marker_regression.py
@@ -57,7 +57,7 @@ class MarkerRegression(object):
 
         all_samples_ordered = self.dataset.group.all_samples_ordered()
         primary_sample_names = list(all_samples_ordered)
-        
+
         for sample in self.dataset.group.samplelist:
             in_trait_data = False
             for item in self.this_trait.data:
@@ -241,9 +241,9 @@ class MarkerRegression(object):
                         highest_chr = marker['chr']
                     if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                         self.qtl_results.append(marker)
-       
+
             self.trimmed_markers = trim_markers_for_table(results)
-			
+
             self.json_data['chr'] = []
             self.json_data['pos'] = []
             self.json_data['lod.hk'] = []
@@ -560,7 +560,7 @@ class MarkerRegression(object):
 
 
     def gen_pheno_txt_file_plink(self, pheno_filename = ''):
-        ped_sample_list = self.get_samples_from_ped_file()	
+        ped_sample_list = self.get_samples_from_ped_file()
         output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
         header = 'FID\tIID\t%s\n' % self.this_trait.name
         output_file.write(header)
@@ -595,7 +595,7 @@ class MarkerRegression(object):
         output_file.close()
 
     def gen_pheno_txt_file_rqtl(self, pheno_filename = ''):
-        ped_sample_list = self.get_samples_from_ped_file()	
+        ped_sample_list = self.get_samples_from_ped_file()
         output_file = open("%s%s.txt" % (TMPDIR, pheno_filename), "wb")
         header = 'FID\tIID\t%s\n' % self.this_trait.name
         output_file.write(header)
@@ -653,7 +653,7 @@ class MarkerRegression(object):
             genotype = genotype.addinterval()
 
         samples, values, variances, sample_aliases = self.this_trait.export_informative()
-        
+
         trimmed_samples = []
         trimmed_values = []
         for i in range(0, len(samples)):
@@ -818,7 +818,7 @@ class MarkerRegression(object):
         """Runs permutations and gets significant and suggestive LOD scores"""
 
         top_lod_scores = []
-	
+
         #print("self.num_perm:", self.num_perm)
 
         for permutation in range(self.num_perm):
@@ -1084,7 +1084,7 @@ def create_snp_iterator_file(group):
 
 def trim_markers_for_table(markers):
     num_markers = len(markers)
-	
+
     if 'lod_score' in markers[0].keys():
         sorted_markers = sorted(markers, key=lambda k: k['lod_score'], reverse=True)
     else:
-- 
cgit 1.4.1