diff options
Diffstat (limited to 'doc/Architecture.org')
-rw-r--r-- | doc/Architecture.org | 135 |
1 files changed, 125 insertions, 10 deletions
diff --git a/doc/Architecture.org b/doc/Architecture.org index ed19889c..04e05e40 100644 --- a/doc/Architecture.org +++ b/doc/Architecture.org @@ -1,19 +1,20 @@ -* GeneNetwork Architecture - #+TITLE: Installing GeneNetwork services * Table of Contents :TOC: - - [[#genenetwork-architecture][GeneNetwork Architecture]] - - [[#introduction][Introduction]] - - [[#webserver][Webserver]] - - [[#gnserver-rest][GnServer (REST)]] + - [[#introduction][Introduction]] + - [[#webserver][Webserver]] + - [[#gnserver-rest][GnServer (REST)]] + - [[#gnexec][GnExec]] + - [[#database][Database]] + - [[#phenotypes][Phenotypes]] + - [[#genotypes][Genotypes]] -** Introduction +* Introduction This document describes the architecture of GN2. Because GN2 is evolving, only a high-level overview is given here. -** Webserver +* Webserver The main [[https://github.com/genenetwork/genenetwork2][GN2 webserver]] is built on [[http://flask.pocoo.org/][Python flask]] and this GN2 source code can be found on [[https://github.com/genenetwork/genenetwork2/tree/master/wqflask/wqflask][github]] in the wqflask directory. The routing @@ -44,7 +45,7 @@ Consortium M430v2 (Jun06) PDNN to find all records with MEAN between 15 and 16 and with LRS between 23 and 46.'. Then the results are added to a table which is displayed using a JS [[https://datatables.net/][DataTable container]]. -** GnServer (REST) +* GnServer (REST) The [[https://github.com/genenetwork/gn_server][GnServer REST API]] is built on high performance [[http://elixir-lang.org/][Elixir]] with [[https://github.com/falood/maru][Maru]]. Mainly the GnServer serves JSON requests, for example to fetch data @@ -52,4 +53,118 @@ from the database. To get the menu data in YAML you can do something like : curl localhost:8880/int/menu/main.json|ruby extra/json2yaml.rb -(json2yaml.rb is in the gn_server repo). +(json2yaml.rb is in the gn_server repo). For the current API definition +see [[https://github.com/genenetwork/gn_server/doc/API.md][GnServer REST API]] documentation. + +* GnExec + +GnExec, also written in Elixir, executes commands using a separate +daemon. + +* Database +** Phenotypes + +Phenotypes are stored in the SQL database. For what happens at the +database level see [[database.org]]. A test database can be downloaded - +see the installation [[./README.org][instructions]]. + +** Genotypes + +Genotypes are stored in genotype files. These are part of the GNU Guix +distribution, see the installation [[./README.org][instructions]]. Genotype files are +currently in GN1 format, and will be aligned with the [[http://kbroman.org/qtl2/pages/sampledata.html][R/qtl2 formats]]. + +GN1-style (still default GN2) for the stored file BXD.geno: + +#+begin_src js +@name:BXD +@type:riset +@mat:B +@pat:D +@het:H +@unk:U +Chr Locus cM Mb BXD1 BXD2 BXD5 BXD6 BXD8 BXD9 BXD11 BXD12 BXD13 BXD14 BX +D15 BXD16 BXD18 BXD19 BXD20 BXD21 BXD22 BXD23 BXD24a BXD24 BXD25 BXD27 BXD28 BX +D29 BXD30 BXD31 BXD32 BXD33 BXD34 BXD35 BXD36 BXD37 BXD38 BXD39 BXD40 BXD41 BXD4 +2 BXD43 BXD44 BXD45 BXD48 BXD49 BXD50 BXD51 BXD52 BXD53 BXD54 BXD55 BXD56 BXD59 +BXD60 BXD61 BXD62 BXD63 BXD64 BXD65 BXD66 BXD67 BXD68 BXD69 BXD70 BXD71 BXD72 BX +D73 BXD74 BXD75 BXD76 BXD77 BXD78 BXD79 BXD80 BXD81 BXD83 BXD84 BXD85 BXD86 BXD8 +7 BXD88 BXD89 BXD90 BXD91 BXD92 BXD93 BXD94 BXD95 BXD96 BXD97 BXD98 BXD99 BXD100 + BXD101 BXD102 BXD103 +1 rs6269442 0.0 3.482275 B B D D D B B D B B D D B D D D D B B B D B D D B B B +B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B +D D D D D H B D D B D B B D D B D D B B B B B B B D +1 rs6365999 0.0 4.811062 B B D D D B B D B B D D B D D D D B B B D B D D B B B +B B B B B B D B D B B D B B H H B D B B H H B B D D D D D B B H B B B B D B D B +D D D D D H B D D B D B B D D B D D B B B B B B U D +... +#+end_src + +and, for example, in the method run_rqtl_geno this file gets +loaded. For GnServer, however, we only want to deal with standardized +R/qtl formatted data, so with gn_extra we convert the original format +into R/qtl format with geno2rqtl with one adaptation: the geno table +is transposed so now becomes + +#+begin_src js +marker,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103 +1,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D +2,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D +3,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D +... +#+end_src js + +i.e. individuals are columns and markers are rows. Alternatively it could look like + +#+begin_src js +marker,BXD1,BXD2,BXD5,BXD6,BXD8,BXD9,BXD11,BXD12,BXD13,BXD14,BXD15,BXD16,BXD18,BXD19,BXD20,BXD21,BXD22,BXD23,BXD24a,BXD24,BXD25,BXD27,BXD28,BXD29,BXD30,BXD31,BXD32,BXD33,BXD34,BXD35,BXD36,BXD37,BXD38,BXD39,BXD40,BXD41,BXD42,BXD43,BXD44,BXD45,BXD48,BXD49,BXD50,BXD51,BXD52,BXD53,BXD54,BXD55,BXD56,BXD59,BXD60,BXD61,BXD62,BXD63,BXD64,BXD65,BXD66,BXD67,BXD68,BXD69,BXD70,BXD71,BXD72,BXD73,BXD74,BXD75,BXD76,BXD77,BXD78,BXD79,BXD80,BXD81,BXD83,BXD84,BXD85,BXD86,BXD87,BXD88,BXD89,BXD90,BXD91,BXD92,BXD93,BXD94,BXD95,BXD96,BXD97,BXD98,BXD99,BXD100,BXD101,BXD102,BXD103 +rs6269442,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,B,D +rs6365999,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,B,D,B,B,H,H,B,D,B,B,H,H,B,B,D,D,D,D,D,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D +rs6376963,B,B,D,D,D,B,B,D,B,B,D,D,B,D,D,D,D,B,B,B,D,B,D,D,B,B,B,B,B,B,B,B,B,D,B,D,B,D,D,B,B,H,H,B,B,B,B,H,H,B,B,D,D,D,D,B,B,B,H,B,B,B,B,D,B,D,B,D,D,D,D,D,H,B,D,D,B,D,B,B,D,D,B,D,D,B,B,B,B,B,B,U,D +#+end_src js + +This is also the format provided by R/qtl in +https://github.com/rqtl/qtl2data/tree/master/DO_Recla which we will +use as the base line for the REST server. In the meta json file the +genotype data is tagged as transposed: + +#+begin_src js +{ +"description": "DO data from Recla et al. (2014) Mamm Genome 25:211-222", +"crosstype": "do", +"geno": "recla_geno.csv", +"geno_transposed": true, +"founder_geno": "recla_foundergeno.csv", +"founder_geno_transposed": true, +"genotypes": { + "1": "1", + "2": "2", + "3": "3" +}, +"pheno": "recla_pheno.csv", +"pheno_transposed": false, +"covar": "recla_covar.csv", +"sex": { + "covar": "Sex", + "female": "female", + "male": "male" +}, +"x_chr": "X", +"cross_info": { + "covar": "ngen" +}, +"gmap": "recla_gmap.csv", +"pmap": "recla_pmap.csv", +"alleles": ["A", "B", "C", "D", "E", "F", "G", "H"] +} +#+end_src + +Meanwhile the gmap file looks like + +#+begin_src js +marker,chr,pos,Mb +rs6269442,1,0.0,3.482275 +rs6365999,1,0.0,4.811062 +rs6376963,1,0.895,5.008089 +rs3677817,1,1.185,5.176058 +#+end_src |