diff options
Diffstat (limited to 'doc/joss/2016')
-rw-r--r-- | doc/joss/2016/paper.bib | 50 | ||||
-rw-r--r-- | doc/joss/2016/paper.md | 87 | ||||
-rw-r--r-- | doc/joss/2016/qtl.png | bin | 0 -> 146924 bytes | |||
-rw-r--r-- | doc/joss/2016/qtl2.png | bin | 0 -> 375505 bytes |
4 files changed, 137 insertions, 0 deletions
diff --git a/doc/joss/2016/paper.bib b/doc/joss/2016/paper.bib new file mode 100644 index 00000000..34c0fd05 --- /dev/null +++ b/doc/joss/2016/paper.bib @@ -0,0 +1,50 @@ +@article{WGCNA:2008, + author = {Langfelder, P. and Horvath, S.}, + title = {{WGCNA: an R package for weighted correlation network analysis}}, + journal = {BMC Bioinformatics}, + year = {2008}, + volume = {9}, + pages = {559}, + doi = {10.1186/1471-2105-9-559}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/19114008}, + abstract = {BACKGROUND: Correlation networks are increasingly being used in bioinformatics applications. For example, weighted gene co-expression network analysis is a systems biology method for describing the correlation patterns among genes across microarray samples. Weighted correlation network analysis (WGCNA) can be used for finding clusters (modules) of highly correlated genes, for summarizing such clusters using the module eigengene or an intramodular hub gene, for relating modules to one another and to external sample traits (using eigengene network methodology), and for calculating module membership measures. Correlation networks facilitate network based gene screening methods that can be used to identify candidate biomarkers or therapeutic targets. These methods have been successfully applied in various biological contexts, e.g. cancer, mouse genetics, yeast genetics, and analysis of brain imaging data. While parts of the correlation network methodology have been described in separate publications, there is a need to provide a user-friendly, comprehensive, and consistent software implementation and an accompanying tutorial. RESULTS: The WGCNA R software package is a comprehensive collection of R functions for performing various aspects of weighted correlation network analysis. The package includes functions for network construction, module detection, gene selection, calculations of topological properties, data simulation, visualization, and interfacing with external software. Along with the R package we also present R software tutorials. While the methods development was motivated by gene expression data, the underlying data mining approach can be applied to a variety of different settings. CONCLUSION: The WGCNA package provides R functions for weighted correlation network analysis, e.g. co-expression network analysis of gene expression data. The R package along with its source code and additional material are freely available at http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA.} +} + +@article{Wang:2016, + author = {Wang, X. and Pandey, A. K. and Mulligan, M. K. and Williams, E. G. and Mozhui, K. and Li, Z. and Jovaisaite, V. and Quarles, L. D. and Xiao, Z. and Huang, J. and Capra, J. A. and Chen, Z. and Taylor, W. L. and Bastarache, L. and Niu, X. and Pollard, K. S. and Ciobanu, D. C. and Reznik, A. O. and Tishkov, A. V. and Zhulin, I. B. and Peng, J. and Nelson, S. F. and Denny, J. C. and Auwerx, J. and Lu, L. and Williams, R. W.}, + title = {{Joint mouse-human phenome-wide association to test gene function and disease risk}}, + journal = {Nat Commun}, + year = {2016}, + volume = {7}, + pages = {10464}, + doi = {10.1038/ncomms10464}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/26833085}, + abstract = {Phenome-wide association is a novel reverse genetic strategy to analyze genome-to-phenome relations in human clinical cohorts. Here we test this approach using a large murine population segregating for approximately 5 million sequence variants, and we compare our results to those extracted from a matched analysis of gene variants in a large human cohort. For the mouse cohort, we amassed a deep and broad open-access phenome consisting of approximately 4,500 metabolic, physiological, pharmacological and behavioural traits, and more than 90 independent expression quantitative trait locus (QTL), transcriptome, proteome, metagenome and metabolome data sets--by far the largest coherent phenome for any experimental cohort (www.genenetwork.org). We tested downstream effects of subsets of variants and discovered several novel associations, including a missense mutation in fumarate hydratase that controls variation in the mitochondrial unfolded protein response in both mouse and Caenorhabditis elegans, and missense mutations in Col6a5 that underlies variation in bone mineral density in both mouse and human.} +} + +@article{Lippert:2011, + author = {Lippert, C. and Listgarten, J. and Liu, Y. and Kadie, C. M. and Davidson, R. I. and Heckerman, D.}, + title = {{FaST linear mixed models for genome-wide association studies}}, + journal = {Nat Methods}, + year = {2011}, + volume = {8}, + number = {10}, + pages = {833-835}, + doi = {10.1038/nmeth.1681}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/21892150}, + abstract = {We describe factored spectrally transformed linear mixed models (FaST-LMM), an algorithm for genome-wide association studies (GWAS) that scales linearly with cohort size in both run time and memory use. On Wellcome Trust data for 15,000 individuals, FaST-LMM ran an order of magnitude faster than current efficient algorithms. Our algorithm can analyze data for 120,000 individuals in just a few hours, whereas current algorithms fail on data for even 20,000 individuals (http://mscompbio.codeplex.com/).} +} + +@article{Arends:2010, + author = {Arends, D. and Prins, P. and Jansen, R. C. and Broman, K. W.}, + title = {{R/qtl: high-throughput multiple QTL mapping}}, + journal = {Bioinformatics}, + year = {2010}, + volume = {26}, + number = {23}, + pages = {2990-2992}, + doi = {10.1093/bioinformatics/btq565}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/20966004}, + abstract = {MOTIVATION: R/qtl is free and powerful software for mapping and exploring quantitative trait loci (QTL). R/qtl provides a fully comprehensive range of methods for a wide range of experimental cross types. We recently added multiple QTL mapping (MQM) to R/qtl. MQM adds higher statistical power to detect and disentangle the effects of multiple linked and unlinked QTL compared with many other methods. MQM for R/qtl adds many new features including improved handling of missing data, analysis of 10,000 s of molecular traits, permutation for determining significance thresholds for QTL and QTL hot spots, and visualizations for cis-trans and QTL interaction effects. MQM for R/qtl is the first free and open source implementation of MQM that is multi-platform, scalable and suitable for automated procedures and large genetical genomics datasets. AVAILABILITY: R/qtl is free and open source multi-platform software for the statistical language R, and is made available under the GPLv3 license. R/qtl can be installed from http://www.rqtl.org/. R/qtl queries should be directed at the mailing list, see http://www.rqtl.org/list/. CONTACT: kbroman@biostat.wisc.edu.}, + +} diff --git a/doc/joss/2016/paper.md b/doc/joss/2016/paper.md new file mode 100644 index 00000000..12b3b5d0 --- /dev/null +++ b/doc/joss/2016/paper.md @@ -0,0 +1,87 @@ +--- +title: 'GeneNetwork: framework for web-based genetics' +tags: + - bioinformatics + - genetics + - genomics +authors: + - name: Zachary Sloan + orcid: 0000-0002-8099-1363 + affiliation: University of Tennessee Health Science Center, USA + - name: Danny Arends + orcid: 0000-0001-8738-0162 + affiliation: Humboldt University, Berlin, Germany + - name: Karl W. Broman + orcid: 0000-0002-4914-6671 + affiliation: University of Wisconsin, USA + - name: Arthur Centeno + orcid: 0000-0003-3142-2081 + affiliation: University of Tennessee Health Science Center, USA + - name: Nicholas Furlotte + orcid: 0000-0002-9096-6276 + - name: Harm Nijveen + orcid: 0000-0002-9167-4945 + affiliation: Wageningen University, The Netherlands + - name: Lei Yan + orcid: 0000-0001-5259-3379 + affiliation: University of Tennessee Health Science Center, USA + - name: Xiang Zhou + orcid: 0000-0002-4331-7599 + affiliation: University of Michigan + - name: Robert W. Williams + orcid: 0000-0001-8924-4447 + affiliation: University of Tennessee Health Science Center, USA + - name: Pjotr Prins + orcid: 0000-0002-8021-9162 + affiliation: University Medical Center Utrecht, The Netherlands, University of Tennessee Health Science Center, USA +date: 29 May 2016 +bibliography: paper.bib +--- + +# Summary + +GeneNetwork (GN) is a free and open source (FOSS) framework for +web-based genetics that can be deployed anywhere. GN allows biologists +to upload high-throughput experimental data, such as expression data +from microarrays and RNA-seq, and also `classic' phenotypes, such as +disease phenotypes. These phenotypes can be mapped interactively +against genotypes using embedded tools, such as R/QTL [@Arends:2010] +mapping, interval mapping for model organisms and pylmm; an +implementation of FaST-LMM [@Lippert:2011] which is more suitable for +human populations and outbred crosses, such as the mouse diversity +outcross. Interactive D3 graphics are included from R/qtlcharts and +presentation-ready figures can be generated. Recently we have added +functionality for phenotype correlation [@Wang:2016] and network +analysis [@WGCNA:2008]. + +-![Mouse LMM mapping example](qtl2.png) + +GN is written in python and javascript and contains a rich set of +tools and libraries that can be written in any computer language. A +full list of included software can be found in the package named +`genenetwork2' and defined in +[guix-bioinformatics](https://github.com/genenetwork/guix-bioinformatics/blob/master/gn/packages/genenetwork.scm). To +make it easy to install GN locally in a byte reproducible way, +including all dependencies and a 2GB MySQL test database (the full +database is 160GB and growing), GN is packaged with +[GNU Guix](https://www.gnu.org/software/guix/), as described +[here](https://github.com/genenetwork/genenetwork2/blob/master/doc/README.org). +GNU Guix deployment makes it feasible to deploy and rebrand GN +anywhere. + +# Future work + +More mapping tools will be added, including support for Genome-wide +Efficient Mixed Model Association (GEMMA). The +[Biodiallance genome browser](http://www.biodalliance.org/) is being +added as a Google Summer of Code project with special tracks related +to QTL mapping and network analysis. Faster LMM solutions are being +worked on, including GPU support. + +A REST interface is being added so that data can be uploaded to a +server, analysis run remotely on high performance hardware, and +results downloaded and used for further analysis. This feature will +allow biologist-programmers to use R and Python on their computer and +execute computations on GN enabled servers. + +# References diff --git a/doc/joss/2016/qtl.png b/doc/joss/2016/qtl.png Binary files differnew file mode 100644 index 00000000..995a2739 --- /dev/null +++ b/doc/joss/2016/qtl.png diff --git a/doc/joss/2016/qtl2.png b/doc/joss/2016/qtl2.png Binary files differnew file mode 100644 index 00000000..e0b684ef --- /dev/null +++ b/doc/joss/2016/qtl2.png |