From 4ccb2411b78bcc5112316cef7192033d698b8b03 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 May 2016 16:51:45 +0000 Subject: Added JOSS paper --- doc/joss/2016/paper.bib | 36 ++++++++++++++++++++ doc/joss/2016/paper.json | 23 +++++++++++++ doc/joss/2016/paper.md | 84 +++++++++++++++++++++++++++++++++++++++++++++++ doc/joss/2016/qtl.png | Bin 0 -> 146924 bytes doc/joss/2016/qtl2.png | Bin 0 -> 375505 bytes 5 files changed, 143 insertions(+) create mode 100644 doc/joss/2016/paper.bib create mode 100644 doc/joss/2016/paper.json create mode 100644 doc/joss/2016/paper.md create mode 100644 doc/joss/2016/qtl.png create mode 100644 doc/joss/2016/qtl2.png (limited to 'doc/joss/2016') diff --git a/doc/joss/2016/paper.bib b/doc/joss/2016/paper.bib new file mode 100644 index 00000000..73a88227 --- /dev/null +++ b/doc/joss/2016/paper.bib @@ -0,0 +1,36 @@ +@article{WGCNA:2008, + author = {Langfelder, P. and Horvath, S.}, + title = {{WGCNA: an R package for weighted correlation network analysis}}, + journal = {BMC Bioinformatics}, + year = {2008}, + volume = {9}, + pages = {559}, + doi = {10.1186/1471-2105-9-559}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/19114008}, + abstract = {BACKGROUND: Correlation networks are increasingly being used in bioinformatics applications. For example, weighted gene co-expression network analysis is a systems biology method for describing the correlation patterns among genes across microarray samples. Weighted correlation network analysis (WGCNA) can be used for finding clusters (modules) of highly correlated genes, for summarizing such clusters using the module eigengene or an intramodular hub gene, for relating modules to one another and to external sample traits (using eigengene network methodology), and for calculating module membership measures. Correlation networks facilitate network based gene screening methods that can be used to identify candidate biomarkers or therapeutic targets. These methods have been successfully applied in various biological contexts, e.g. cancer, mouse genetics, yeast genetics, and analysis of brain imaging data. While parts of the correlation network methodology have been described in separate publications, there is a need to provide a user-friendly, comprehensive, and consistent software implementation and an accompanying tutorial. RESULTS: The WGCNA R software package is a comprehensive collection of R functions for performing various aspects of weighted correlation network analysis. The package includes functions for network construction, module detection, gene selection, calculations of topological properties, data simulation, visualization, and interfacing with external software. Along with the R package we also present R software tutorials. While the methods development was motivated by gene expression data, the underlying data mining approach can be applied to a variety of different settings. CONCLUSION: The WGCNA package provides R functions for weighted correlation network analysis, e.g. co-expression network analysis of gene expression data. The R package along with its source code and additional material are freely available at http://www.genetics.ucla.edu/labs/horvath/CoexpressionNetwork/Rpackages/WGCNA.} +} + +@article{Wang:2016, + author = {Wang, X. and Pandey, A. K. and Mulligan, M. K. and Williams, E. G. and Mozhui, K. and Li, Z. and Jovaisaite, V. and Quarles, L. D. and Xiao, Z. and Huang, J. and Capra, J. A. and Chen, Z. and Taylor, W. L. and Bastarache, L. and Niu, X. and Pollard, K. S. and Ciobanu, D. C. and Reznik, A. O. and Tishkov, A. V. and Zhulin, I. B. and Peng, J. and Nelson, S. F. and Denny, J. C. and Auwerx, J. and Lu, L. and Williams, R. W.}, + title = {{Joint mouse-human phenome-wide association to test gene function and disease risk}}, + journal = {Nat Commun}, + year = {2016}, + volume = {7}, + pages = {10464}, + doi = {10.1038/ncomms10464}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/26833085}, + abstract = {Phenome-wide association is a novel reverse genetic strategy to analyze genome-to-phenome relations in human clinical cohorts. Here we test this approach using a large murine population segregating for approximately 5 million sequence variants, and we compare our results to those extracted from a matched analysis of gene variants in a large human cohort. For the mouse cohort, we amassed a deep and broad open-access phenome consisting of approximately 4,500 metabolic, physiological, pharmacological and behavioural traits, and more than 90 independent expression quantitative trait locus (QTL), transcriptome, proteome, metagenome and metabolome data sets--by far the largest coherent phenome for any experimental cohort (www.genenetwork.org). We tested downstream effects of subsets of variants and discovered several novel associations, including a missense mutation in fumarate hydratase that controls variation in the mitochondrial unfolded protein response in both mouse and Caenorhabditis elegans, and missense mutations in Col6a5 that underlies variation in bone mineral density in both mouse and human.} +} + +@article{Lippert:2011, + author = {Lippert, C. and Listgarten, J. and Liu, Y. and Kadie, C. M. and Davidson, R. I. and Heckerman, D.}, + title = {{FaST linear mixed models for genome-wide association studies}}, + journal = {Nat Methods}, + year = {2011}, + volume = {8}, + number = {10}, + pages = {833-835}, + doi = {10.1038/nmeth.1681}, + url = {http://www.ncbi.nlm.nih.gov/pubmed/21892150}, + abstract = {We describe factored spectrally transformed linear mixed models (FaST-LMM), an algorithm for genome-wide association studies (GWAS) that scales linearly with cohort size in both run time and memory use. On Wellcome Trust data for 15,000 individuals, FaST-LMM ran an order of magnitude faster than current efficient algorithms. Our algorithm can analyze data for 120,000 individuals in just a few hours, whereas current algorithms fail on data for even 20,000 individuals (http://mscompbio.codeplex.com/).} +} diff --git a/doc/joss/2016/paper.json b/doc/joss/2016/paper.json new file mode 100644 index 00000000..c3c02156 --- /dev/null +++ b/doc/joss/2016/paper.json @@ -0,0 +1,23 @@ +{ + "@context": "https://raw.githubusercontent.com/mbjones/codemeta/master/codemeta.jsonld", + "@type": "Code", + "author": [ + { + "@id": "0000-0002-9623-3401", + "@type": "Person", + "email": "jakevdp@uw.edu", + "name": "Jake VanderPlas", + "affiliation": "University of Washington eScience Institute" + } + ], + "identifier": "https://zenodo.org/record/50995#.Vyp9DBUrJBw", + "codeRepository": "http://github.com/jakevdp/mst_clustering", + "datePublished": "2016-05-04", + "dateModified": "2016-05-04", + "dateCreated": "2016-05-04", + "description": "Clustering via Euclidean Minimum Spanning Trees", + "keywords": "machine learning", + "license": "BSD", + "title": "mst_clustering", + "version": "v1.0" +} diff --git a/doc/joss/2016/paper.md b/doc/joss/2016/paper.md new file mode 100644 index 00000000..81ec2b72 --- /dev/null +++ b/doc/joss/2016/paper.md @@ -0,0 +1,84 @@ +--- +title: 'GeneNetwork: framework for web-based genetics' +tags: + - bioinformatics + - genetics + - genomics +authors: + - name: Zachary Sloan + orcid: 0000-0002-8099-1363 + affiliation: University of Tennessee Health Science Center, USA + - name: Danny Arends + orcid: 0000-0001-8738-0162 + affiliation: Humboldt Universityl, Berlin, Germany + - name: Karl W. Broman + orcid: 0000-0002-4914-6671 + affiliation: University of Wisconsin, USA + - name: Arthur Centeno + orcid: 0000-0003-3142-2081 + affiliation: University of Tennessee Health Science Center, USA + - name: Nick Furlotte + orcid: ? + - name: Harm Nijveen + orcid: 0000-0002-9167-4945 + affiliation: Wageningen University, The Netherlands + - name: Lei Yan + orcid: 0000-0001-5259-3379 + affiliation: University of Tennessee Health Science Center, USA + - name: Xiang Zhou + orcid: 0000-0002-4331-7599 + affiliation: University of Michigan + - name: Robert W. WIlliams + orcid: 0000-0001-8924-4447 + affiliation: University of Tennessee Health Science Center, USA + - name: Pjotr Prins + orcid: orcid.org/0000-0002-8021-9162 + affiliation: University Medical Center Utrecht, The Netherlands + affiliation: University of Tennessee Health Science Center, USA +date: 29 May 2016 +bibliography: paper.bib +--- + +# Summary + +GeneNetwork (GN) is a free and open source (FOSS) framework for web +based genetics that can be deployed anywhere. GN allows biologists to +upload experimental data and map phenotypes interactively against +genotypes using tools, such as R/QTL [@mqm paper] mapping, interval +mapping for model organisms and pylmm; an implementation of FaST-LMM +[@Lippert:2011] which is suitable for human populations and outbred +crosses, such as the mouse diversity outcross. Interactive D3 graphics +are included from R/qtlcharts and presentation-ready figures can be +generated. Recently we have added functionality for phenotype +correlation [@Wang:2016] and network analysis [@WGCNA:2008]. + +-![Mouse LMM mapping example](qtl2.png) + +GN is written in python and javascript and contains a rich set of +tools and libraries that can be written in any computer language. A +full list of included software can be found in +[guix-bioinformatics](https://github.com/genenetwork/guix-bioinformatics/blob/master/gn/packages/genenetwork.scm). To +make it easy to install GN locally in a byte reproducible way, +including all dependencies and a 2GB MySQL test database (the full +database is 160GB and growing), GN is packaged with +[GNU Guix](https://www.gnu.org/software/guix/), as described +[here](https://github.com/genenetwork/genenetwork2/blob/staging/doc/README.org). +GNU Guix deployment makes it feasible to deploy and rebrand GN +anywhere. + +# Future work + +More mapping tools will be added, including support for Genome-wide +Efficient Mixed Model Association (GEMMA). The +[Biodiallance genome browser](http://www.biodalliance.org/) is being +added as a Google Summer of Code project with special tracks related +to QTL mapping and network analysis. Faster LMM solutions are being +worked on, including GPU support. + +A REST interface is being added so that data can be uploaded to a +server, analysis run remotely on high performance hardware, and +results downloaded and used for further analysis. This feature will +allow biologist-programmers to use R and python on their computer and +execute computations on GN enabled servers. + +# References diff --git a/doc/joss/2016/qtl.png b/doc/joss/2016/qtl.png new file mode 100644 index 00000000..995a2739 Binary files /dev/null and b/doc/joss/2016/qtl.png differ diff --git a/doc/joss/2016/qtl2.png b/doc/joss/2016/qtl2.png new file mode 100644 index 00000000..e0b684ef Binary files /dev/null and b/doc/joss/2016/qtl2.png differ -- cgit v1.2.3