From a958718cdf8b2dc3a991c310b7c20a930c87a1d6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:04:05 +0100 Subject: RELEASE NOTES --- README.md | 2 +- RELEASE-NOTES.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d6f7275..97ef042 100644 --- a/README.md +++ b/README.md @@ -253,7 +253,7 @@ program and the contents of the .log.txt file in the output directory. ### Check list: -1. [X] I have found and issue with GEMMA +1. [X] I have found an issue with GEMMA 2. [ ] I have searched for it on the [issue tracker](https://github.com/genetics-statistics/GEMMA/issues?q=is%3Aissue) (incl. closed issues) 3. [ ] I have searched for it on the [mailing list](https://groups.google.com/forum/#!forum/gemma-discussion) 4. [ ] I have tried the latest [release](https://github.com/genetics-statistics/GEMMA/releases) of GEMMA diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 4345c27..0c44887 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -6,6 +6,8 @@ and ## ChangeLog v0.98.3 (2020/?) +* Fix Travis build with gcc 5.5 (OpenBLAS related round-offs) +* GEMMA installs on FreeBSD (thanks @outpaddling) ## ChangeLog v0.98.2 (2019/05/28) -- cgit v1.2.3 From 49f92eb9007456bc35822efc55f28e541d2e2df1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:22:00 +0100 Subject: Trying to fix OSX on Travis --- .travis.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.travis.yml b/.travis.yml index 5bc64eb..92d709c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,9 +10,21 @@ matrix: packages: - libgsl-dev - libopenblas-dev + - os: linux + compiler: clang + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - libgsl-dev + - libopenblas-dev + - os: osx + compiler: clang script: - echo $MATRIX_EVAL - eval "${MATRIX_EVAL}" + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew cask uninstall oclint && brew update && brew install gsl openblas zlib eigen lapack ; fi - $CXX --version - make -j 4 OPENBLAS_LEGACY=1 - make OPENBLAS_LEGACY=1 fast-check -- cgit v1.2.3 From 022c1f9016f6a6eff030dcf44bbe2a555d33a481 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:34:22 +0100 Subject: Trying to fix OSX on Travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 92d709c..9be0612 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ matrix: script: - echo $MATRIX_EVAL - eval "${MATRIX_EVAL}" - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew cask uninstall oclint && brew update && brew install gsl openblas zlib eigen lapack ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install gsl openblas zlib ; fi - $CXX --version - make -j 4 OPENBLAS_LEGACY=1 - make OPENBLAS_LEGACY=1 fast-check -- cgit v1.2.3 From f0666dd5274640301faacb9e2206dd540c66ea9a Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:42:46 +0100 Subject: Trying to fix OSX on Travis --- .travis.yml | 2 +- RELEASE-NOTES.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9be0612..d9baba3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,7 @@ matrix: script: - echo $MATRIX_EVAL - eval "${MATRIX_EVAL}" - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update && brew install gsl openblas zlib ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install gsl openblas zlib ; fi - $CXX --version - make -j 4 OPENBLAS_LEGACY=1 - make OPENBLAS_LEGACY=1 fast-check diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 0c44887..7d8b633 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -7,6 +7,7 @@ and ## ChangeLog v0.98.3 (2020/?) * Fix Travis build with gcc 5.5 (OpenBLAS related round-offs) +* Fix Travis build on OSX (brew related) * GEMMA installs on FreeBSD (thanks @outpaddling) ## ChangeLog v0.98.2 (2019/05/28) -- cgit v1.2.3 From 4bfa67ebad843aa96d2bf44a6023788a4b43d39f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:53:55 +0100 Subject: Trying to fix OSX on Travis --- .travis.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index d9baba3..a8362c0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,7 +24,9 @@ matrix: script: - echo $MATRIX_EVAL - eval "${MATRIX_EVAL}" - - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install gsl openblas zlib ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install gsl ; fi - $CXX --version - - make -j 4 OPENBLAS_LEGACY=1 + - OPENBLAS="$(brew --prefix openblas)" + - echo $OPENBLAS + - make -j 4 CXX=$CXX WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 - make OPENBLAS_LEGACY=1 fast-check -- cgit v1.2.3 From d1bdb764dea3d021eb5a22d313872abed1fee010 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 14:19:10 +0100 Subject: Trying to fix OSX on Travis --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 246cf6f..92e55b9 100644 --- a/Makefile +++ b/Makefile @@ -111,7 +111,7 @@ else endif ifeq ($(CPP), clang++) - GCC_FLAGS=-std=c++11 -isystem$(OPENBLAS_INCLUDE_PATH) + GCC_FLAGS=-std=c++11 -isystem$(OPENBLAS_INCLUDE_PATH) ifdef GUIX CPPFLAGS += -I$(GUIX)/include/c++ -I$(GUIX)/include/c++/x86_64-unknown-linux-gnu endif @@ -158,6 +158,11 @@ static: CPPFLAGS += -static LIBS += -lgsl -lz ifdef WITH_OPENBLAS LIBS += -lopenblas + ifeq ($(SYS), OSX) + ifdef WITH_OPENBLAS + LIBS += -Wl,-L/usr/local/opt/openblas/lib + endif + endif else LIBS += -latlas -lcblas -llapack -lblas endif -- cgit v1.2.3 From 08fb31a3a6d399226c97437814d298a7bfef9e99 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 13:04:05 +0100 Subject: Fixes OSX build on Travis-CI Fixes #160 --- .travis.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a8362c0..9c5b0da 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,8 @@ script: - eval "${MATRIX_EVAL}" - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install gsl ; fi - $CXX --version - - OPENBLAS="$(brew --prefix openblas)" + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install gsl ; fi + - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then OPENBLAS="$(brew --prefix openblas)" ; fi - echo $OPENBLAS - make -j 4 CXX=$CXX WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 - make OPENBLAS_LEGACY=1 fast-check -- cgit v1.2.3 From 1b5b437ed0ae3fccabc5df31b0d7d2058e9cf058 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 29 Sep 2020 14:30:17 +0100 Subject: Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index b6b6d93..f460ff1 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,7 @@ --- name: Bug report -about: Create a report to help us improve +about: 'Important: only report *bugs* on the github issue tracker. For support use + the mailing list on google groups' title: '' labels: '' assignees: '' -- cgit v1.2.3 From 5d4b24280e21da87814ba1b88310bb9b969c4fef Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 30 Sep 2020 08:54:29 +0100 Subject: Added information on disabling filters Relates to #234 --- doc/manual.pdf | Bin 319480 -> 272474 bytes doc/manual.tex | 15 ++++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/doc/manual.pdf b/doc/manual.pdf index 1b7dc5d..0980ea6 100644 Binary files a/doc/manual.pdf and b/doc/manual.pdf differ diff --git a/doc/manual.tex b/doc/manual.tex index 73acb60..1cf0400 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -733,21 +733,24 @@ The are a few SNP filters implemented in the software. \item Missingness. By default, SNPs with missingness below 5\% will not be included in the analysis. Use ``-miss [num]'' to change. For - example, ``-miss 0.1'' changes the threshold to 10\%. + example, ``-miss 0.1'' changes the threshold to 10\%. With + ``-miss 1.0'' the filter is disabled. \item Minor allele frequency. By default, SNPs with minor allele frequency below 1\% will not be included in the analysis. Use ``-maf [num]" to change. For example, ``-maf 0.05'' changes the threshold - to 5\%. + to 5\%. With ``-notsnp'' the filter is disabled. \item Correlation with any covariate. By default, SNPs with $r^2$ correlation with any of the covariates above 0.9999 will not be included in the analysis. Use ``-r2 [num]'' to change. For example, - ``-r2 0.999999'' changes the threshold to 0.999999. + ``-r2 0.999999'' changes the threshold to 0.999999. With ``-r2 + 1.0'' the filter is disabled. \item Hardy-Weinberg equilibrium. Use ``-hwe [num]'' to specify. For example, ``-hwe 0.001'' will filter out SNPs with Hardy-Weinberg $p$ - values below 0.001. + values below 0.001. With ``-hwe 0'' or ``--notsnp'' the filter is + disabled. \item User-defined SNP list. Use ``-snps [filename]'' to specify a list of SNPs to be included in the analysis. @@ -1452,7 +1455,7 @@ You can use -outdir with gemma as a bash script \end{verbatim} makes a unique temp directory where the output is stored, here -relative to $HOME, but you can take any path. +relative to \$HOME, but you can take any path. \subsection{How do I prepare the phenotype file for BSLMM?} @@ -1480,8 +1483,6 @@ prefix.prdt.txt file will match the total sample size. Please refer to the GWAS sample data set and some demo scripts included with the GEMMA source code for detailed examples. -\end{enumerate} - \clearpage \newpage -- cgit v1.2.3 From 2dcc65a01abb6ec2cb0d90346cc821d15e8d36f6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 30 Sep 2020 08:55:06 +0100 Subject: Align R2 filter with docs --- src/gemma_io.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index b4f8d39..4bcba62 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -824,7 +824,7 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, gsl_blas_ddot(genotype, genotype, &v_x); gsl_blas_ddot(Wtx, WtWiWtx, &v_w); - if (W->size2 != 1 && v_w / v_x >= r2_level) { + if (W->size2 != 1 && v_w / v_x > r2_level) { indicator_snp.push_back(0); continue; } -- cgit v1.2.3 From 1a431e1319e2ff3855f8617f446a33a3f931bad6 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 1 Oct 2020 09:24:15 +0100 Subject: Adding comments related to https://github.com/genetics-statistics/GEMMA/issues/234 --- src/gemma_io.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 4bcba62..569c79b 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -729,6 +729,7 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, cM = mapRS2cM[rs]; } + // Start on a new marker/SNP maf = 0; n_miss = 0; flag_poly = 0; @@ -765,12 +766,13 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, gsl_vector_set(genotype, c_idv, geno); - if (flag_poly == 0) { - geno_old = geno; - flag_poly = 2; + // going through genotypes with 0.0 < geno < 2.0 + if (flag_poly == 0) { // first init in marker + geno_old = geno; // set geno_old (double) to previous genotype + flag_poly = 2; // initialized state } if (flag_poly == 2 && geno != geno_old) { - flag_poly = 1; + flag_poly = 1; // genotypes differ } maf += geno; @@ -788,21 +790,25 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, snpInfo.push_back(sInfo); file_pos++; + // -miss flag if ((double)n_miss / (double)ni_test > miss_level) { indicator_snp.push_back(0); continue; } + // -maf flag if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) { indicator_snp.push_back(0); continue; } + // remove genotype lines that are identical to the one read before if (flag_poly != 1) { indicator_snp.push_back(0); continue; } + // -hwe flag if (hwe_level != 0 && maf_level != -1) { if (CalcHWE(n_0, n_2, n_1) < hwe_level) { indicator_snp.push_back(0); @@ -810,8 +816,8 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, } } - // Filter SNP if it is correlated with W unless W has - // only one column, of 1s. + + // -r2 flag for (size_t i = 0; i < genotype->size; ++i) { if (gsl_vector_get(genotype_miss, i) == 1) { geno = maf * 2.0; @@ -824,6 +830,8 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, gsl_blas_ddot(genotype, genotype, &v_x); gsl_blas_ddot(Wtx, WtWiWtx, &v_w); + // Filter SNP if it is correlated with covariates W, unless W has + // only one column, of 1s (-r2 flag) if (W->size2 != 1 && v_w / v_x > r2_level) { indicator_snp.push_back(0); continue; @@ -1181,7 +1189,7 @@ void ReadFile_kin(const string &file_kin, vector &indicator_idv, size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0; while (getline(infile, line)) { if (i_total == ni_total) { - fail_msg("number of rows in the kinship file is larger than the number of phentypes"); + fail_msg("number of rows in the kinship file is larger than the number of phenotypes"); } if (indicator_idv[i_total] == 0) { @@ -1507,7 +1515,8 @@ bool BimbamKin(const string file_geno, const set ksnps, if (ns_test<1) write(geno,"geno mean"); // scale the genotypes - if (k_mode == 2 && geno_var != 0) { // some confusion here + if (k_mode == 2 && geno_var != 0) { // some confusion here, -gk 2 + // flag does this gsl_vector_scale(geno, 1.0 / sqrt(geno_var)); } -- cgit v1.2.3 From b298c466171006dfb1196c74d7017d0dc8e45492 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Thu, 1 Oct 2020 09:57:34 +0100 Subject: These variables are introduced in DEBUG mode --- src/gemma_io.cpp | 2 ++ src/lmm.cpp | 13 ++++++++----- src/mathfunc.cpp | 6 +++++- 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 569c79b..81182f8 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -1557,6 +1557,7 @@ bool BimbamKin(const string file_geno, const set ksnps, // FIXME: the following is not so slow but appears to generate an // identical matrix + /* for (size_t i = 0; i < ni_total; ++i) { for (size_t j = 0; j < i; ++j) { double d = gsl_matrix_get(matrix_kin, j, i); @@ -1564,6 +1565,7 @@ bool BimbamKin(const string file_geno, const set ksnps, } } write(matrix_kin,"K rotated"); + */ // GSL is faster - and there are even faster methods // enforce_gsl(gsl_matrix_transpose(matrix_kin)); diff --git a/src/lmm.cpp b/src/lmm.cpp index 6337116..5e53fa2 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -277,9 +277,9 @@ Iterating through a dataset Hi_eval differs and Uab (last row) void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, const gsl_matrix *Uab, const gsl_vector *unused, gsl_matrix *Pab) { - - // size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; // result size - // auto ni_test = Uab->size1; // inds +#if !defined NDEBUG + size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; // result size + auto ni_test = Uab->size1; // inds assert(Uab->size1 == Hi_eval->size); assert(Uab->size2 == n_index); @@ -287,6 +287,7 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, assert(Pab->size2 == n_index); assert(Hi_eval->size == ni_test); // assert(ab->size == n_index); +#endif // DEBUG // compute Hi_eval (inds) * Uab (inds x n_index) * ab (n_index) and return in Pab (cvt x n_index). @@ -593,8 +594,9 @@ $7 = 3 $8 = 6 */ - // auto Uab = p->Uab; - // auto ab = p->ab; +#if !defined NDEBUG + auto Uab = p->Uab; + auto ab = p->ab; assert(n_index == (n_cvt + 2 + 1) * (n_cvt + 2) / 2); assert(Uab->size1 == ni_test); assert(Uab->size2 == n_index); // n_cvt == 1 -> n_index == 6? @@ -606,6 +608,7 @@ $8 = 6 assert(p->e_mode == 0); assert(Hi_eval->size == ni_test); +#endif // DEBUG CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); CalcPPab(n_cvt, p->e_mode, HiHi_eval, p->Uab, p->ab, Pab, PPab); diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index aaa9431..e74a841 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -400,7 +400,11 @@ uint count_abs_small_values(const gsl_vector *v, double min) { // and the ratio of max and min but one (min is expected to be zero). bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) { auto t = abs_minmax(eigenvalues); - // auto absmin = get<0>(t); + +#if !defined NDEBUG + auto absmin = get<0>(t); +#endif + auto absmin1 = get<1>(t); auto absmax = get<2>(t); if (absmax/absmin1 > max_ratio) { -- cgit v1.2.3 From 9c1f593711c1d4bc5e583453268c92380df4797f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 4 Oct 2020 08:28:40 +0100 Subject: Silence means silent! --- src/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/debug.h b/src/debug.h index 25ba673..ff22756 100644 --- a/src/debug.h +++ b/src/debug.h @@ -118,7 +118,7 @@ inline void fail_msg(std::string msg) { std::raise(SIGINT); // keep stack trace for gdb } -#define info_msg(msg) cerr << "**** INFO: " << msg << "." << endl; +#define info_msg(msg) if (!is_quiet_mode()) cerr << "**** INFO: " << msg << "." << endl; #define msg(msg) info_msg(msg); #if defined NDEBUG -- cgit v1.2.3 From 2aa81393d42647f26cbdf24393163753c3e72891 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Tue, 6 Oct 2020 08:02:58 +0100 Subject: Improved some error messages --- src/gemma_io.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp index 81182f8..45d1eb4 100644 --- a/src/gemma_io.cpp +++ b/src/gemma_io.cpp @@ -414,7 +414,7 @@ bool ReadFile_pheno(const string &file_pheno, ch_ptr = strtok((char *)line.c_str(), " ,\t"); size_t i = 0; while (i < p_max) { - enforce_msg(ch_ptr,"Number of phenotypes out of range"); + enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match phenotypes in geno file"); if (mapP2c.count(i + 1) != 0) { if (strcmp(ch_ptr, "NA") == 0) { ind_pheno_row[mapP2c[i + 1]] = 0; @@ -597,7 +597,7 @@ bool ReadFile_fam(const string &file_fam, vector> &indicator_pheno, size_t i = 0; while (i < p_max) { if (mapP2c.count(i + 1) != 0) { - enforce_msg(ch_ptr,"Problem reading FAM file (phenotypes out of range)"); + enforce_msg(ch_ptr,"Problem reading FAM file (phenotypes do not match geno file)"); if (strcmp(ch_ptr, "NA") == 0) { ind_pheno_row[mapP2c[i + 1]] = 0; @@ -745,7 +745,7 @@ bool ReadFile_geno(const string &file_geno, const set &setSnps, if (indicator_idv[i] == 0) continue; - enforce_msg(ch_ptr,"Problem reading geno file"); + enforce_msg(ch_ptr,"Problem reading geno file (not enough genotypes in line)"); if (strcmp(ch_ptr, "NA") == 0) { gsl_vector_set(genotype_miss, c_idv, 1); n_miss++; -- cgit v1.2.3 From 64b5183d30d3abcdd0236f5033570ae901506683 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 09:45:37 +0000 Subject: Docs: minor edit --- doc/manual.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/manual.tex b/doc/manual.tex index 1cf0400..94e686b 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -1009,8 +1009,8 @@ analyzed SNPs have an effect on phenotype, explains about 60\% of the variance in the phenotype residuals (with standard error of 3.6\%) after removing linear effects of the two covariates. The genetic and environmental variance components of the residuals are 1.43 and 0.34, -respectively. The last two lines in the log file give the coefficients -of the two covariates in the fitted linear mixed model, with standard +respectively. The last two values are the regression coefficients for +the covariates in the fitted linear mixed model, with standard errors. The first number (0.007) is the estimate of the intercept because the first column in the covariates file is a column of ones. -- cgit v1.2.3 From ad2d303ebf4999a9e4833beb73a4bc6f0dee4952 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 13:25:27 +0000 Subject: Remove unused code --- RELEASE-NOTES.md | 4 +++- src/ldr.cpp | 39 --------------------------------------- src/ldr.h | 6 +++--- src/param.cpp | 2 +- 4 files changed, 7 insertions(+), 44 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 7d8b633..946774d 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -4,7 +4,9 @@ see and [commits](https://github.com/genetics-statistics/GEMMA/commits/master). -## ChangeLog v0.98.3 (2020/?) +## ChangeLog v0.98.3 (2020/11/28) + +Maintenance release * Fix Travis build with gcc 5.5 (OpenBLAS related round-offs) * Fix Travis build on OSX (brew related) diff --git a/src/ldr.cpp b/src/ldr.cpp index f70eb85..a4ef40a 100644 --- a/src/ldr.cpp +++ b/src/ldr.cpp @@ -29,7 +29,6 @@ #include #include -// #include "Eigen/Dense" #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" @@ -46,7 +45,6 @@ #include "param.h" using namespace std; -// using namespace Eigen; void LDR::CopyFromParam(PARAM &cPar) { a_mode = cPar.a_mode; @@ -73,40 +71,3 @@ void LDR::CopyFromParam(PARAM &cPar) { void LDR::CopyToParam(PARAM &cPar) { return; } -/* -// X is a p by n matrix. -void LDR::VB(const vector> &Xt, const gsl_matrix *W_gsl, - const gsl_vector *y_gsl) { - - // Save gsl_vector and gsl_matrix into Eigen library formats. - MatrixXd W(W_gsl->size1, W_gsl->size2); - VectorXd y(y_gsl->size); - VectorXd x_col(y_gsl->size); - - double d; - for (size_t i = 0; i < W_gsl->size1; i++) { - d = gsl_vector_get(y_gsl, i); - y(i) = d; - for (size_t j = 0; j < W_gsl->size2; j++) { - W(i, j) = gsl_matrix_get(W_gsl, i, j); - } - } - - // Initial VB values by lm. - cout << indicator_snp[0] << " " << indicator_snp[1] << " " << indicator_snp[2] - << endl; - uchar_matrix_get_row(Xt, 0, x_col); - - for (size_t j = 0; j < 10; j++) { - cout << x_col(j) << endl; - } - - // Run VB iterations. - // TO DO. - - // Save results. - // TO DO. - - return; -} -*/ diff --git a/src/ldr.h b/src/ldr.h index 6720689..629b064 100644 --- a/src/ldr.h +++ b/src/ldr.h @@ -20,8 +20,8 @@ #define __LDR_H__ #include "param.h" -#include -#include +// #include +// #include #include #include @@ -56,7 +56,7 @@ public: vector snpInfo; // Record SNP information. // Not included in PARAM. - gsl_rng *gsl_r; + // gsl_rng *gsl_r; // Main functions. void CopyFromParam(PARAM &cPar); diff --git a/src/param.cpp b/src/param.cpp index cc4290c..e4781c4 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -1961,7 +1961,7 @@ void PARAM::CheckCvt() { return; } -// Post-process phentoypes and covariates. +// Post-process phenotypes and covariates. void PARAM::ProcessCvtPhen() { // Convert indicator_pheno to indicator_idv. -- cgit v1.2.3 From 4a56c11c95e9f13670c906b353fe9360344eb913 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 14:39:05 +0000 Subject: Sane random generator handling --- src/bslmm.cpp | 33 +++------------------------------ src/bslmm.h | 7 ++++--- src/bslmmdap.h | 4 ++-- src/param.cpp | 42 ++++++++++++++++++++++++++---------------- src/param.h | 7 +++++-- 5 files changed, 40 insertions(+), 53 deletions(-) diff --git a/src/bslmm.cpp b/src/bslmm.cpp index 3305639..eb961e9 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -84,7 +84,8 @@ void BSLMM::CopyFromParam(PARAM &cPar) { w_pace = cPar.w_pace; n_mh = cPar.n_mh; geo_mean = cPar.geo_mean; - randseed = cPar.randseed; + // randseed = cPar.randseed; + gsl_r = cPar.gsl_r; trace_G = cPar.trace_G; ni_total = cPar.ni_total; @@ -107,7 +108,7 @@ void BSLMM::CopyToParam(PARAM &cPar) { cPar.cHyp_initial = cHyp_initial; cPar.n_accept = n_accept; cPar.pheno_mean = pheno_mean; - cPar.randseed = randseed; + // cPar.randseed = randseed; return; } @@ -938,19 +939,6 @@ void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX, // Calculate proposal distribution for gamma (unnormalized), // and set up gsl_r and gsl_t. - gsl_rng_env_setup(); - const gsl_rng_type *gslType; - gslType = gsl_rng_default; - if (randseed < 0) { - time_t rawtime; - time(&rawtime); - tm *ptm = gmtime(&rawtime); - - randseed = - (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); double *p_gamma = new double[ns_test]; CalcPgamma(p_gamma); @@ -1643,21 +1631,6 @@ void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) { } // Calculate proposal distribution for gamma (unnormalized), - // and set up gsl_r and gsl_t. - gsl_rng_env_setup(); - const gsl_rng_type *gslType; - gslType = gsl_rng_default; - if (randseed < 0) { - time_t rawtime; - time(&rawtime); - tm *ptm = gmtime(&rawtime); - - randseed = - (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - double *p_gamma = new double[ns_test]; CalcPgamma(p_gamma); diff --git a/src/bslmm.h b/src/bslmm.h index d2dadbf..93dadf9 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -20,7 +20,7 @@ #define __BSLMM_H__ #include -#include +// #include #include #include @@ -60,7 +60,8 @@ public: size_t n_accept; // Number of acceptances. size_t n_mh; // Number of MH steps per iter. double geo_mean; // Mean of geometric dist. - long int randseed; + // long int randseed; + gsl_rng *gsl_r; // Track randomizer state double trace_G; HYPBSLMM cHyp_initial; @@ -88,7 +89,7 @@ public: vector snpInfo; // Not included in PARAM. - gsl_rng *gsl_r; + // gsl_rng *gsl_r; gsl_ran_discrete_t *gsl_t; map mapRank2pos; diff --git a/src/bslmmdap.h b/src/bslmmdap.h index dc05e34..0f560c2 100644 --- a/src/bslmmdap.h +++ b/src/bslmmdap.h @@ -21,7 +21,7 @@ #include "param.h" #include -#include +// #include #include #include @@ -44,7 +44,7 @@ public: double pheno_mean; // BSLMM MCMC related parameters - long int randseed; + // long int randseed; double trace_G; HYPBSLMM cHyp_initial; diff --git a/src/param.cpp b/src/param.cpp index e4781c4..db6d8d5 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -106,6 +106,10 @@ PARAM::PARAM(void) time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {} +PARAM::~PARAM() { + gsl_rng_free(gsl_r); +} + // Read files: obtain ns_total, ng_total, ns_test, ni_test. void PARAM::ReadFiles(void) { string file_str; @@ -817,6 +821,28 @@ void PARAM::CheckParam(void) { flag++; } + // Always set up random environment. + gsl_rng_env_setup(); // sets gsl_rng_default_seed + const gsl_rng_type *T = gsl_rng_default; // pick up environment GSL_RNG_SEED + + if (randseed >= 0) + gsl_rng_default_seed = randseed; // CLI option used + else if (gsl_rng_default_seed == 0) { // by default we will randomize the seed + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + gsl_rng_default_seed = + (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(T); + + if (is_debug_mode()) { + printf ("GSL random generator type: %s; ", gsl_rng_name (gsl_r)); + printf ("seed = %lu (option %li); ", gsl_rng_default_seed, randseed); + printf ("first value = %lu\n", gsl_rng_get (gsl_r)); + } + if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 && a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 && a_mode != 63 && a_mode != 66 && a_mode != 67) { @@ -2015,22 +2041,6 @@ void PARAM::ProcessCvtPhen() { << "analyzed individuals. " << endl; } else { - // Set up random environment. - gsl_rng_env_setup(); - gsl_rng *gsl_r; - const gsl_rng_type *gslType; - gslType = gsl_rng_default; - if (randseed < 0) { - time_t rawtime; - time(&rawtime); - tm *ptm = gmtime(&rawtime); - - randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + - ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - // From ni_test, sub-sample ni_subsample. vector a, b; for (size_t i = 0; i < ni_subsample; i++) { diff --git a/src/param.h b/src/param.h index 9ad14b2..eb2cef7 100644 --- a/src/param.h +++ b/src/param.h @@ -23,6 +23,7 @@ #include "debug.h" #include "gsl/gsl_matrix.h" +#include #include "gsl/gsl_vector.h" #include #include @@ -211,7 +212,8 @@ public: size_t n_accept; // Number of acceptance. size_t n_mh; // # MH steps in each iter. double geo_mean; // Mean of geometric dist. - long int randseed; + long int randseed; // holds -seed parameter + gsl_rng *gsl_r; // Track the randomizer double trace_G; HYPBSLMM cHyp_initial; @@ -324,8 +326,9 @@ public: set setKSnps; // Set of snps for K (-ksnps and LOCO) set setGWASnps; // Set of snps for GWA (-gwasnps and LOCO) - // Constructor. + // Constructor and destructor PARAM(); + ~PARAM(); // Functions. void ReadFiles(); -- cgit v1.2.3 From 58f1ca7e072207b446d7389ced3427284c414a9c Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 14:46:44 +0000 Subject: Manual: Remove information on gemmaf since it no longer applies. --- doc/manual.tex | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/doc/manual.tex b/doc/manual.tex index 94e686b..555d766 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -321,15 +321,6 @@ installing GSL library, please refer to \url{http://www.gnu.org/s/gsl/}. For details on installing LAPACK library, please refer to \url{http://www.netlib.org/lapack/}. -If you are interested in fitting BSLMM for a large scale GWAS data set -but have limited memory to store the entire genotype matrix, you could -compile GEMMA in float precision. A float precision binary executable, -named ``gemmaf", is available inside the ``bin" folder in the source -code. To compile a float precision binary by yourself, you can first -run ``d2f.sh" script inside the ``src" folder, and then enable -``FORCE\_FLOAT" option in the Makefile. The float version could save -about half of the memory without appreciable loss of accuracy. - \newpage \section{Input File Formats} @@ -1123,8 +1114,7 @@ compressed format. Notice that a large memory is needed to fit BSLMM (e.g. may need 20 GB for a data set with 4000 individuals and 400,000 SNPs), because the software has to store the whole genotype matrix in the physical -memory. The float version (gemmaf) can be used to save about half of -the memory requirement without noticeable loss of accuracy. +memory. In default, GEMMA does not require the user to provide a relatedness matrix explicitly. It internally calculates and uses the centered -- cgit v1.2.3 From c92d8f64d1326ace41a9b5400641e78a8420fa09 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 15:09:05 +0000 Subject: Prepare for release --- RELEASE-NOTES.md | 11 ++++++++++- src/version.h | 4 ++-- test/dev_test_suite.sh | 1 + test/lengthy_test_suite.sh | 1 + test/test_suite.sh | 5 +++-- 5 files changed, 17 insertions(+), 5 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 946774d..36243c1 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -11,6 +11,15 @@ Maintenance release * Fix Travis build with gcc 5.5 (OpenBLAS related round-offs) * Fix Travis build on OSX (brew related) * GEMMA installs on FreeBSD (thanks @outpaddling) +* Added github issue templates to ascertain the github issue + tracker is only used for reporting bugs +* Added more debug output creating the GRM +* Remove info on the floating point version (gemmaf). +* Sane randomization handling: GEMMA now honours the -seed option + (mostly for bslmm). It also allows GSL_RNG_SEED and GSL_RNG_TYPE to + be used. See the + [docs](https://www.gnu.org/software/gsl/doc/html/rng.html). +* The tests now use a fixed seed for the randomizer ## ChangeLog v0.98.2 (2019/05/28) @@ -24,7 +33,7 @@ GCC 10.1 fix release Bug fix release -* Fixes regression on Plink analysis with missing data (thank you @voichek) +* Fixes regression on Plink analysis with missing data #188 (thank you @voichek) To install the image, download and diff --git a/src/version.h b/src/version.h index 6c22a1b..6492b1a 100644 --- a/src/version.h +++ b/src/version.h @@ -1,5 +1,5 @@ // version.h generated by GEMMA scripts/gen_version_info.sh #define GEMMA_VERSION "0.98.3" -#define GEMMA_DATE "2020-09-29" +#define GEMMA_DATE "2020-11-28" #define GEMMA_YEAR "2020" -#define GEMMA_PROFILE "/gnu/store/a7a35vv75zk9k23k8ws4v2wrs123dln1-profile" +#define GEMMA_PROFILE "" diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 0e36218..903c72f 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -3,6 +3,7 @@ gemma=../bin/gemma # gemmaopts="-debug -strict" gemmaopts="-debug -check" +export GSL_RNG_SEED=100 testLinearModel() { $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \ diff --git a/test/lengthy_test_suite.sh b/test/lengthy_test_suite.sh index 231475e..6e93220 100755 --- a/test/lengthy_test_suite.sh +++ b/test/lengthy_test_suite.sh @@ -3,6 +3,7 @@ # Long running tests go here gemma=../bin/gemma +export GSL_RNG_SEED=100 testPlinkStandardRelatednessMatrixK() { testname=testPlinkStandardRelatednessMatrixK diff --git a/test/test_suite.sh b/test/test_suite.sh index a598cd2..cc244c0 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -2,6 +2,7 @@ gemma=../bin/gemma gemmaopts="-debug" +export GSL_RNG_SEED=10 testBslmm1() { outn=mouse_hs1940_CD8_bslmm @@ -72,10 +73,10 @@ testBslmm5() { -epm ./output/mouse_hs1940_CD8_bslmm_cc1.param.txt \ -emu ./output/mouse_hs1940_CD8_bslmm_cc1.log.txt \ -predict \ - -o $outn + -o $outn -seed 100 assertEquals 0 $? outfn=output/$outn.prdt.txt - assertEquals "550.67" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals "571.08" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixKFullLOCO1() { -- cgit v1.2.3 From a28c76eca4afbfefd142227c348555faa853642b Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sat, 28 Nov 2020 15:22:41 +0000 Subject: Checked the timings; Travis passes for Linux and MacOS --- test/performance/releases.org | 47 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/test/performance/releases.org b/test/performance/releases.org index 1be5818..79d830b 100644 --- a/test/performance/releases.org +++ b/test/performance/releases.org @@ -90,6 +90,48 @@ trix const*, gsl_matrix*, gsl_matrix*) onst*) #+END_SRC +* GEMMA 0.98.3 (release) + +#+begin_src sh +time ./bin/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -a ./example/mouse_hs1940.anno.txt -gk -no-check + +GEMMA 0.98.3 (2020-11-28) by Xiang Zhou and team (C) 2012-2020 +Reading Files ... +## number of total individuals = 1940 +## number of analyzed individuals = 1410 +## number of covariates = 1 +## number of phenotypes = 1 +## number of total SNPs/var = 12226 +## number of analyzed SNPs = 10768 +Calculating Relatedness Matrix ... +================================================== 100% + +real 0m7.068s +user 0m14.904s +sys 0m1.454s + +time ./bin/gemma -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -n 1 -a ./example/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check + +GEMMA 0.98.3 (2020-11-28) by Xiang Zhou and team (C) 2012-2020 +Reading Files ... +## number of total individuals = 1940 +## number of analyzed individuals = 1410 +## number of covariates = 1 +## number of phenotypes = 1 +## number of total SNPs/var = 12226 +## number of analyzed SNPs = 10768 +Start Eigen-Decomposition... +pve estimate =0.608801 +se(pve) =0.032774 +================================================== 100% + +real 0m12.581s +user 0m17.318s +sys 0m2.079s +#+end_src + + + * GEMMA 0.98.2 (release) Looks like openblas is getting faster. Two metrics on the same machine: @@ -106,13 +148,13 @@ Reading Files ... ## number of analyzed SNPs = 10768 Calculating Relatedness Matrix ... ================================================== 100% -**** INFO: Done. real 0m7.635s user 0m14.821s sys 0m1.077s #+END_SRC +The static version #+BEGIN_SRC sh lario:~/iwrk/opensource/code/genetics/gemma$ time ./bin/gemma-0.98-linux-static -g ./example/mouse_hs1940.geno.txt.gz -p ./example/mouse_hs1940.pheno.txt -a ./example/mouse_hs1940.anno.txt -gk -no-check @@ -126,7 +168,6 @@ Reading Files ... ## number of analyzed SNPs = 10768 Calculating Relatedness Matrix ... ================================================== 100% -**** INFO: Done. real 0m10.663s user 0m20.994s @@ -151,7 +192,6 @@ Reading Files ... ## number of analyzed SNPs = 10768 Calculating Relatedness Matrix ... ================================================== 100% -**** INFO: Done. real 0m7.590s user 0m30.392s @@ -180,7 +220,6 @@ Start Eigen-Decomposition... pve estimate =0.608801 se(pve) =0.032774 ================================================== 100% -**** INFO: Done. real 0m17.813s user 0m43.460s -- cgit v1.2.3 From 72dbc476025fa932232beb79e28555f7bac6e7db Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 08:48:05 +0000 Subject: Travis: trying arm64 build --- .travis.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.travis.yml b/.travis.yml index 9c5b0da..ead5d29 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,6 +2,9 @@ language: C++ matrix: include: - os: linux + arch: + - amd64 + - arm64 compiler: gcc addons: apt: -- cgit v1.2.3 From 1a403e059409b7b88feb4b4003002d1288b1f876 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 08:49:43 +0000 Subject: README and RELEASE-NOTES --- README.md | 19 ++++++++++--------- RELEASE-NOTES.md | 9 +++++++++ 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 97ef042..68e2d05 100644 --- a/README.md +++ b/README.md @@ -9,22 +9,23 @@ GEMMA is a software toolkit for fast application of linear mixed models (LMMs) and related models to genome-wide association studies (GWAS) and other large-scale data sets. -Check out [NEWS.md](NEWS.md) to see what's new in each GEMMA release. +Check out [RELEASE-NOTES.md](RELEASE-NOTES.md) to see what's new in +each GEMMA release. -Please post feature requests or suspected bugs to +Please post suspected bugs to [Github issues](https://github.com/genetics-statistics/GEMMA/issues). For questions or other discussion, please post to the [GEMMA Google Group](https://groups.google.com/group/gemma-discussion). We also encourage contributions, for example, by forking the repository, making your changes to the code, and issuing a pull request. -Currently, GEMMA is supported for 64-bit Mac OS X and Linux -platforms. *Windows is not currently supported.* though you can -run GEMMA in a Linux VM or [container](https://docs.docker.com/docker-for-windows/). If you are interested -in helping to make GEMMA available on Windows platforms (e.g., by -providing installation instructions for Windows, or by contributing -Windows binaries) please post a note in the -[Github issues](https://github.com/genetics-statistics/GEMMA/issues). +Currently, GEMMA provides a runnable Docker container for 64-bit +MacOS, Windows and Linux platforms. GEMMA can be installed with +Debian, Conda, Homebrew and GNU Guix. With Guix you find the latest +version +[here](http://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics) +as it is the version we use every day on http://genenetwork.org. For +installation instructions see also [INSTALL.md](INSTALL.md). *(The above image depicts physiological and behavioral trait loci identified in CFW mice using GEMMA, from [Parker et al, Nature diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 36243c1..7fa72cf 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -21,6 +21,15 @@ Maintenance release [docs](https://www.gnu.org/software/gsl/doc/html/rng.html). * The tests now use a fixed seed for the randomizer +A docker binary that runs on Linux, MaxOS and Windows can be downloaded from + +http://ipfs.genenetwork.org/ipfs/Qmaq1q73ox53ykKdRF6tYDXL9bEKJQfnGCqBxFdo1fcYPb/gemma-0.98.3-AMD64-Guix-docker-release.tgz + +After loading the image into Docker, run with something like + + docker run -w /run -v ${PWD}:/run ed5bf7499691 gemma -gk -bfile example/mouse_hs1940 + + ## ChangeLog v0.98.2 (2019/05/28) GCC 10.1 fix release -- cgit v1.2.3 From 4637f63c986d40b5aa8d53d053d3b3fa222809de Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 08:52:27 +0000 Subject: Travis --- .travis.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index ead5d29..b47556a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ matrix: - os: linux arch: - amd64 - - arm64 compiler: gcc addons: apt: @@ -24,6 +23,17 @@ matrix: - libopenblas-dev - os: osx compiler: clang + - os: linux + arch: + - arm64 + compiler: gcc + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - libgsl-dev + - libopenblas-dev script: - echo $MATRIX_EVAL - eval "${MATRIX_EVAL}" -- cgit v1.2.3 From 3130e3d48e27901f516675f49d4b0f0913798bb8 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 09:16:46 +0000 Subject: Updating README and manual --- README.md | 50 +++++++++++++++++++++++++------------------------- doc/manual.tex | 21 +++++++-------------- 2 files changed, 32 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 68e2d05..b7a2bb8 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,9 @@ Debian, Conda, Homebrew and GNU Guix. With Guix you find the latest version [here](http://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics) as it is the version we use every day on http://genenetwork.org. For -installation instructions see also [INSTALL.md](INSTALL.md). +installation instructions see also [INSTALL.md](INSTALL.md). We use +continous integration builds on Travis-CI for Linux (amd64 & arm64) +and MacOS (amd64). *(The above image depicts physiological and behavioral trait loci identified in CFW mice using GEMMA, from [Parker et al, Nature @@ -51,19 +53,19 @@ Genetics, 2016](https://doi.org/10.1038/ng.3609).) 1. Fast assocation tests implemented using the univariate linear mixed model (LMM). In GWAS, this can correct for population structure and -sample nonexchangeability. It also provides estimates of the +sample non-exchangeability. It also provides estimates of the proportion of variance in phenotypes explained by available genotypes (PVE), often called "chip heritability" or "SNP heritability". 2. Fast association tests for multiple phenotypes implemented using a multivariate linear mixed model (mvLMM). In GWAS, this can correct for -populations tructure and sample nonexchangeability jointly in multiple -complex phenotypes. +population structure and sample (non)exchangeability - jointly in +multiple complex phenotypes. 3. Bayesian sparse linear mixed model (BSLMM) for estimating PVE, phenotype prediction, and multi-marker modeling in GWAS. -4. Estimation of variance components ("chip heritability") partitioned +4. Estimation of variance components ("chip/SNP heritability") partitioned by different SNP functional categories from raw (individual-level) data or summary data. For raw data, HE regression or the REML AI algorithm can be used to estimate variance components when @@ -74,7 +76,8 @@ MQS algorithm to estimate variance components. To install GEMMA you can -1. Download the precompiled binaries (64-bit Linux and Mac only) +1. Download the precompiled or Docker binaries + from [releases](https://github.com/genetics-statistics/GEMMA/releases). 2. Use existing package managers, see [INSTALL.md](INSTALL.md). @@ -89,20 +92,16 @@ numerical libraries. 1. Fetch the [latest stable release][latest_release] and download the file appropriate for your platform. -2. For .tar.bz2 files unpack the tar ball +2. For Docker images, install Docker, load the image into Docker and + run with something like - tar xvjf gemma-$version-installer.tar.bz2 - - run the installer - - ./install.sh ~/gemma - - and run gemma - - ~/gemma/bin/gemma + docker run -w /run -v ${PWD}:/run ed5bf7499691 gemma -gk -bfile example/mouse_hs1940 3. For .gz files run `gunzip gemma.linux.gz` or `gunzip -gemma.linux.gz` to unpack the file. +gemma.linux.gz` to unpack the file. And make sure it is executable with + + chmod u+x gemma-linux + ./gemma-linux ## Run GEMMA @@ -132,24 +131,25 @@ Above example files can be downloaded from GEMMA has a wide range of debugging options which can be viewed with ``` -gemma -h 14 - DEBUG OPTIONS + -check enable checks (slower) -no-fpe-check disable hardware floating point checking -strict strict mode will stop when there is a problem -silence silent terminal display -debug debug output -debug-data debug data output + -nind [num] read up to num individuals + -issue [num] enable tests relevant to issue tracker -legacy run gemma in legacy mode ``` -typically when running gemma you should use -debug which includes relevant -checks. +typically when running gemma you should use -debug which includes +relevant checks. When compiled for debugging the debug version of +GEMMA gives more information. -For performances you may want to use the -no-check option -instead. Also check the build optimization notes in -[INSTALL.md](INSTALL.md). +For performance you may want to use the -no-check option. Also check +the build optimization notes in [INSTALL.md](INSTALL.md). ## Help @@ -192,7 +192,7 @@ studies.](https://doi.org/10.1101/042846) *Annals of Applied Statistics*, in pre ## License -Copyright (C) 2012–2018, Xiang Zhou and team. +Copyright (C) 2012–2020, Xiang Zhou and team. The *GEMMA* source code repository is free software: you can redistribute it under the terms of the diff --git a/doc/manual.tex b/doc/manual.tex index 555d766..dc0aadf 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -75,7 +75,7 @@ association studies (GWAS). It fits a univariate linear mixed model (LMM) for marker association tests with a single phenotype to account for population stratification and sample structure, and for estimating the proportion of variance in phenotypes explained (PVE) by typed -genotypes (i.e. "chip heritability") \cite{Zhou:2012}. It fits a +genotypes (i.e. ``chip heritability'' or ``SNP heritability'') \cite{Zhou:2012}. It fits a multivariate linear mixed model (mvLMM) for testing marker associations with multiple phenotypes simultaneously while controlling for population stratification, and for estimating genetic correlations @@ -139,8 +139,8 @@ score). GEMMA obtains either the maximum likelihood estimate (MLE) or the restricted maximum likelihood estimate (REML) of $\lambda$ and $\beta$, and outputs the corresponding $p$ value. -In addition, GEMMA estimates the PVE by typed genotypes or ``chip -heritability". +In addition, GEMMA estimates the PVE by typed genotypes or ``chip or +SNP heritability''. \subsubsection{Multivariate Linear Mixed Model} GEMMA can fit a multivariate linear mixed model in the following form: @@ -307,19 +307,12 @@ platform. The binary executable of GEMMA works well for a reasonably large number of individuals (say, for example, the ``-eigen " option works -for at least 45,000 individuals). Due to the outdated computation -environment the software was compiled on, however, for larger sample -size and for improved computation efficiency, it is recommended to -compile GEMMA on user's own modern computer system. +for at least 45,000 individuals). If you want to compile GEMMA by yourself, you will need to download the source code, and you will need a standard C/C++ compiler such as -GNU gcc, as well as the GSL and LAPACK libraries. You will need to -change the library paths in the Makefile accordingly. A sample -Makefile is provided along with the source code. For details on -installing GSL library, please refer to -\url{http://www.gnu.org/s/gsl/}. For details on installing LAPACK -library, please refer to \url{http://www.netlib.org/lapack/}. +GNU gcc, as well as GSL and OpenBLAS libraries. A sample +Makefile is provided along with the source code. \newpage @@ -334,7 +327,7 @@ genotypes and using BIMBAM files for phenotypes) will result in unwanted errors. BIMBAM format is particularly useful for imputed genotypes, as PLINK codes genotypes using 0/1/2, while BIMBAM can accommodate any real values between 0 and 2 (and any real values if -paired with ``-notsnp" option). In addition, to estimate variance +paired with ``-notsnp'' option). In addition, to estimate variance components using summary statistics, GEMMA requires two other input files: one contains marginal z-scores and the other contains SNP category. -- cgit v1.2.3 From 16d491f215ce894ff046755bf84e17a184db48b1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 09:26:16 +0000 Subject: Update INSTALL doc --- INSTALL.md | 55 +++++++++++++++++++++---------------------------------- 1 file changed, 21 insertions(+), 34 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 8aa7e78..ef023ab 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -10,8 +10,8 @@ and it should give you the version. ## GEMMA dependencies -GEMMA runs on Linux and MAC OSX and the runtime has the following -dependencies: +GEMMA runs on Linux, MAC OSX and Windows (with Docker). The runtime +has the following dependencies: * C++ tool chain >= 5.5.0 (see Travis CI and we test with file .guix-dev-gcc-older) * GNU Science library (GSL) 2.x (GEMMA dropped support for GSL 1.x) @@ -52,41 +52,32 @@ using the following command guix package -i gemma -To build GEMMA from source you can opt to install the build tools with GNU Guix +A more recent version may be found in the guix-bioinformatics channel +which is maintained by the authors. See the +[README](http://git.genenetwork.org/guix-bioinformatics/guix-bioinformatics), e.g. - guix package -i make gcc linux-libre-headers gsl openblas lapack glibc ld-wrapper + env GUIX_PACKAGE_PATH=./guix-bioinformatics guix package -A gemma -The current build container is in [guix-dev](../guix-dev) +To build GEMMA from source you can opt to install the build tools with +GNU Guix, the current build container is in [guix-dev](./.guix-dev) - guix environment -C guix --ad-hoc gcc-toolchain gdb gsl openblas zlib bash ld-wrapper perl vim which + source .guix-dev + make -To build with an older gcc, for example: +Guix allows for easy versioning. To build with an older gcc, for +example: guix environment -C guix --ad-hoc gcc-toolchain@9.3.0 gdb gsl openblas zlib bash ld-wrapper perl vim which ### Install from source -Note: Eigen is no longer required! - -Install listed dependencies and run +Install listed dependencies (you may want to take hints from +the Travis-CI [tests](./.travis.yml)) and run make -j 4 (the -j switch builds on 4 cores). -if you get an Eigen error you may need to override the include -path. E.g. to build GEMMA on GNU Guix with shared libs the following -may work - - make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 - -another example overriding optimization and LIB flags (so as to link -against gslv1) would be - - make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 GCC_FLAGS="-Wall" LIBS="$HOME/opt/gsl1/lib/libgsl.a $HOME/opt/gsl1/lib/libgslcblas.a -L$HOME/.guix-profile/lib -pthread -llapack -lblas -lz" - -to run GEMMA tests - time make check You can run gemma in the debugger with, for example @@ -99,27 +90,23 @@ You can run gemma in the debugger with, for example Note that if you get warnings on inspecting variables you should compile with GCC_FLAGS="" to disable optimizations (-O3). E.g. - make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 GCC_FLAGS= + make WITH_OPENBLAS=1 GCC_FLAGS= Other options, such as compiling with warnings, are listed in the Makefile. ### GNU Guix commands used -With git bisect build the older versions of gemma with openblas +Some development examples. With git bisect build the older versions +of gemma with openblas - ~/.config/guix/current/bin/guix environment -C guix --ad-hoc gcc gdb gfortran:lib gsl eigen lapack openblas zlib bash ld-wrapper perl ldc - make clean ; make EIGEN_INCLUDE_PATH=$GUIX_ENVIRONMENT/include/eigen3 WITH_OPENBLAS=1 FORCE_DYNAMIC=1 -j 8 + ~/.config/guix/current/bin/guix environment -C guix --ad-hoc gcc gdb gfortran:lib gsl lapack openblas zlib bash ld-wrapper perl ldc + make clean ; make WITH_OPENBLAS=1 FORCE_DYNAMIC=1 -j 8 or with atlas - ~/.config/guix/current/bin/guix environment -C guix --ad-hoc gcc gdb gfortran:lib gsl eigen lapack atlas zlib bash ld-wrapper perl ldc - make clean ; make EIGEN_INCLUDE_PATH=$GUIX_ENVIRONMENT/include/eigen3/Eigen/ WITH_OPENBLAS= FORCE_DYNAMIC=1 -j 25 - -You may need to symlink Eigen in some older versions - - ln -s $GUIX_ENVIRONMENT/include/eigen3/Eigen src/Eigen - + ~/.config/guix/current/bin/guix environment -C guix --ad-hoc gcc gdb gfortran:lib gsl lapack atlas zlib bash ld-wrapper perl ldc + make clean ; make WITH_OPENBLAS= FORCE_DYNAMIC=1 -j 25 ## Run tests -- cgit v1.2.3 From d5b05dfb0022dbf32658a7249462341a3d452ac3 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 09:27:27 +0000 Subject: Docker hint --- INSTALL.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/INSTALL.md b/INSTALL.md index ef023ab..87066b6 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -69,6 +69,11 @@ example: guix environment -C guix --ad-hoc gcc-toolchain@9.3.0 gdb gsl openblas zlib bash ld-wrapper perl vim which +### Install with Docker + +Recent version of GEMMA come with a 64-bit Docker image that should run +on Linux, Windows and MacOS. + ### Install from source Install listed dependencies (you may want to take hints from -- cgit v1.2.3 From ca54558f2bb55434a334255baf8d495420e8387f Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 29 Nov 2020 09:50:16 +0000 Subject: Regenerated PDF --- doc/manual.pdf | Bin 272474 -> 241286 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/doc/manual.pdf b/doc/manual.pdf index 0980ea6..deef12d 100644 Binary files a/doc/manual.pdf and b/doc/manual.pdf differ -- cgit v1.2.3