diff options
-rw-r--r-- | Makefile | 44 | ||||
-rw-r--r-- | README.md | 9 | ||||
-rw-r--r-- | doc/compile_GEMMA_win64.txt | 48 | ||||
-rw-r--r-- | doc/manual.pdf | bin | 269308 -> 319480 bytes | |||
-rw-r--r-- | doc/manual.tex | 4 | ||||
-rw-r--r-- | example/demo.txt | 4 | ||||
-rw-r--r-- | scripts/gen_version_info.cmd | 16 | ||||
-rw-r--r-- | src/bslmmdap.cpp | 2 | ||||
-rw-r--r-- | src/gemma.cpp | 2 | ||||
-rw-r--r-- | src/gemma_io.cpp (renamed from src/io.cpp) | 2 | ||||
-rw-r--r-- | src/gemma_io.h (renamed from src/io.h) | 0 | ||||
-rw-r--r-- | src/lm.h | 2 | ||||
-rw-r--r-- | src/lmm.cpp | 2 | ||||
-rw-r--r-- | src/lmm.h | 2 | ||||
-rw-r--r-- | src/main.cpp | 6 | ||||
-rw-r--r-- | src/mvlmm.cpp | 2 | ||||
-rw-r--r-- | src/mvlmm.h | 2 | ||||
-rw-r--r-- | src/param.cpp | 4 | ||||
-rw-r--r-- | src/prdt.cpp | 3 | ||||
-rw-r--r-- | src/varcov.cpp | 2 | ||||
-rw-r--r-- | src/varcov.h | 2 | ||||
-rw-r--r-- | src/vc.cpp | 2 | ||||
-rw-r--r-- | src/vc.h | 2 |
23 files changed, 133 insertions, 29 deletions
@@ -41,7 +41,19 @@ GEMMA_VERSION = $(shell cat ./VERSION) # Set this variable to either LNX or MAC -SYS = LNX # LNX|MAC (Linux is the default) +ifeq ($(OS),Windows_NT) + SYS = WIN + VGEN = scripts/gen_version_info.cmd +else + UNAME_S := $(shell uname -s) + ifeq ($(UNAME_S),Darwin) + SYS = MAC + else + SYS = LNX # default to linux + endif + VGEN = scripts/gen_version_info.sh +endif + # Leave blank after "=" to disable; put "= 1" to enable DIST_NAME = gemma-$(GEMMA_VERSION) DEBUG = 1 # DEBUG mode, set DEBUG=0 for a release @@ -53,8 +65,20 @@ OPENBLAS_LEGACY = # Using older OpenBlas FORCE_STATIC = # Static linking of libraries GCC_FLAGS = -Wall -O3 -std=gnu++11 # extra flags -Wl,--allow-multiple-definition TRAVIS_CI = # used by TRAVIS for testing -EIGEN_INCLUDE_PATH = /usr/include/eigen3 -OPENBLAS_INCLUDE_PATH = /usr/local/opt/openblas/include + +GSL_INCLUDE_PATH = +ifeq ($(SYS), WIN) + GSL_INCLUDE_PATH = -isystemc:/MinGW/include -LC:/MinGW/lib + EIGEN_INCLUDE_PATH = ../eigen-git-mirror + OPENBLAS_INCLUDE_PATH = ../OpenBLAS-v0.2.19-Win64-int32/include -L../OpenBLAS-v0.2.19-Win64-int32/lib +else + OPENBLAS_INCLUDE_PATH = /usr/local/opt/openblas/include + ifeq ($(SYS), MAC) + EIGEN_INCLUDE_PATH = /usr/local/include/eigen3 + else + EIGEN_INCLUDE_PATH = /usr/include/eigen3 + endif +endif # -------------------------------------------------------------------- # Edit below this line with caution @@ -73,13 +97,13 @@ endif ifeq ($(CPP), clang++) # macOS Homebrew settings (as used on Travis-CI) - GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem/$(OPENBLAS_INCLUDE_PATH) -isystem//usr/local/include/eigen3 -Wl,-L/usr/local/opt/openblas/lib + GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem$(OPENBLAS_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Wl,-L/usr/local/opt/openblas/lib endif ifdef WITH_OPENBLAS OPENBLAS=1 # WITH_LAPACK = # OPENBLAS usually includes LAPACK - CPPFLAGS += -DOPENBLAS -isystem/$(OPENBLAS_INCLUDE_PATH) + CPPFLAGS += -DOPENBLAS -isystem$(OPENBLAS_INCLUDE_PATH) ifdef OPENBLAS_LEGACY # Legacy version (mostly for Travis-CI) CPPFLAGS += -DOPENBLAS_LEGACY @@ -87,10 +111,14 @@ ifdef WITH_OPENBLAS endif ifdef DEBUG - CPPFLAGS += -g $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc + CPPFLAGS += -g $(GCC_FLAGS) $(GSL_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc else # release mode - CPPFLAGS += -DNDEBUG $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc + CPPFLAGS += -DNDEBUG $(GCC_FLAGS) $(GSL_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc +endif + +ifeq ($(SYS), WIN) + CPPFLAGS += -Duint="unsigned int" -D__CRT__NO_INLINE -D__STRING="__STRINGIFY" -DWINDOWS -DWITH_GSLCBLAS=1 endif ifdef SHOW_COMPILER_WARNINGS @@ -149,7 +177,7 @@ OBJS = $(SOURCES:.cpp=.o) all: $(OUTPUT) ./src/version.h: - ./scripts/gen_version_info.sh > src/version.h + $(VGEN) > src/version.h $(OUTPUT): $(OBJS) $(CPP) $(CPPFLAGS) $(OBJS) $(LIBS) -o $(OUTPUT) @@ -88,11 +88,14 @@ numerical libraries. gemma.linux.gz` to unpack the file. -## Quick start +## Help materials -1. Work through the demo. *Give more details here.* ++ [The GEMMA manual](doc/manual.pdf). -2. Read the manual and run `gemma -h`. *Give more details here.* ++ [Detailed example with HS mouse data](example/demo.txt). + ++ [Tutorial on GEMMA for genome-wide association +analysis](https://github.com/rcc-uchicago/genetic-data-analysis-2). ## Citing GEMMA diff --git a/doc/compile_GEMMA_win64.txt b/doc/compile_GEMMA_win64.txt new file mode 100644 index 0000000..84f16ec --- /dev/null +++ b/doc/compile_GEMMA_win64.txt @@ -0,0 +1,48 @@ +// install R 3.4.3 +https://cran.r-project.org/bin/windows/base/ + +// install Rtools 3.4 +https://cran.r-project.org/bin/windows/Rtools/ + +// Download openblas (v0.2.19-Win64-int32) +https://sourceforge.net/projects/openblas/files/v0.2.19/ + +// Make a place to store the files +mkdir Github +cd Github/ + +// Clone the required dependancies +git clone https://github.com/eigenteam/eigen-git-mirror.git +git clone https://github.com/genetics-statistics/GEMMA.git + +// Download and install gsl2.4, unzip/tar into Github +http://gnu.askapache.com/gsl/ + +// Download and install msys from http://downloads.sourceforge.net/mingw/MSYS-1.0.11.exe +cd c:/msys/1.0 +// Run mysys +msys.bat + +// Under msys, Compile GSL-2.4 inside the msys +cd /c/ +cd Github/gsl-2.4 +./configure --prefix=C:/MinGW +make -j 4 +make install + +// Building Gemma on the R tool chain under windows using CMD compile gemma +cd gemma +make -j 2 + +// Get all the DLLs from: +- MinGW DLLs: https://sourceforge.net/projects/openblas/files/v0.2.12/mingw64_dll.zip/download +- DLLs from the compiled gsl-2.4 +- DLLs from openBLAS + +// Required DLLs: ++ libgcc_s_seh-1.dll ++ libgfortran-3.dll ++ libgsl-23.dll ++ libgslcblas-0.dll ++ libopenblas.dll ++ libquadmath-0.dll diff --git a/doc/manual.pdf b/doc/manual.pdf Binary files differindex b760cc1..1b7dc5d 100644 --- a/doc/manual.pdf +++ b/doc/manual.pdf diff --git a/doc/manual.tex b/doc/manual.tex index 1e042e7..8e5efe2 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -1373,6 +1373,10 @@ format. In addition, to fit MQS-LDW, you will need to add "-wcat specifies the LD score file, which can be provided in a gzip compressed format. +A feature of MQS based variance component estimation is that one only need to use a subset of samples to estimate certain quantities. Using a subset of samples dramatically improves computation speed while maintaining variance component estimation accuracy. To take this strategy, one can use ``-sample [num]" to use a fixed number of random samples to perform estimation. + +Instead of using the genotype data from the study, one can also use genotype data from a reference panel. For example, one can use the genotype data from the 1000 genomes project as the reference. However, any population stratification in the reference panel should be dealt with first. For example, the individuals with European ancestry in the 1000 genomes project come from five subpopulations: CEU, FIN, GBR, IBS, and TSI. MQS computes SNP correlations across all SNP pairs as it should be under the LMM assumption. Therefore, any population stratification in the reference panel would increase the overall SNP correlation estimate, leading to down-ward bias in the final heritability estimate. To address the population stratification in the reference panel, one can include a few dummy variables in the model fitting step as covariates. These covariates represent, for example, the five subpopulations, and are used to effectively center the genotype mean in each subpopulation separately. To do this, one can create a covariate file containing five columns (no header): the first column is all 1 representing the intercept; the second column is 1 for CEU and 0 for others; the third column is 1 for FIN and 0 for others; ...; while the fifth column is 1 for IBS and 0 for others. Afterwards, one can add "-c [filename]" to include this covariate file in the command line. + \subsubsection{Detailed Information} MQS-LDW uses an iterative procedure to update the variance diff --git a/example/demo.txt b/example/demo.txt index 9452e04..9b22175 100644 --- a/example/demo.txt +++ b/example/demo.txt @@ -98,7 +98,9 @@ chr rs ps n_miss allele1 allele0 af beta_1 beta_2 Vbeta_1_1 Vbeta_1_2 Vbeta_2_2 ## To fit BSLMM in the training set: ## To fit a quantitative trait -../bin/gemma -g mouse_hs1940.geno.txt.gz -p mouse_hs1940.pheno.txt -n 2 -a mouse_hs1940.anno.txt -bslmm -o mouse_hs1940_CD8_bslmm -w 1000 -s 10000 -seed 1 +../bin/gemma -g mouse_hs1940.geno.txt.gz -p mouse_hs1940.pheno.txt -n 2 \ + -a mouse_hs1940.anno.txt -bslmm -o mouse_hs1940_CD8_bslmm \ + -w 1000 -s 10000 -seed 1 # the following three files may be of most importance: # the *.hyp.txt contains a column for pve and pge diff --git a/scripts/gen_version_info.cmd b/scripts/gen_version_info.cmd new file mode 100644 index 0000000..d824687 --- /dev/null +++ b/scripts/gen_version_info.cmd @@ -0,0 +1,16 @@ +@echo off +rem https://stackoverflow.com/questions/3472631/how-do-i-get-the-day-month-and-year-from-a-windows-cmd-exe-script +FOR /F "skip=1 tokens=1-6" %%A IN ('WMIC Path Win32_LocalTime Get Day^,Hour^,Minute^,Month^,Second^,Year /Format:table') DO ( + if "%%B" NEQ "" ( + SET /A FDATE=%%F*10000+%%D*100+%%A + ) +) +set year=%FDATE:~0,4% +set /p version=<VERSION + +echo // version.h generated by GEMMA +rem https://stackoverflow.com/questions/7105433/windows-batch-echo-without-new-line +echo|set /p="#define GEMMA_VERSION "" +echo %version%" +echo #define GEMMA_DATE "%FDATE:~0,8%" +echo #define GEMMA_YEAR "%year%" diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp index e9900e3..6220bb5 100644 --- a/src/bslmmdap.cpp +++ b/src/bslmmdap.cpp @@ -39,7 +39,7 @@ #include "gsl/gsl_vector.h" #include "bslmmdap.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "lm.h" #include "lmm.h" diff --git a/src/gemma.cpp b/src/gemma.cpp index 97e947a..758aa24 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -49,7 +49,7 @@ extern "C" { #include "bslmm.h" #include "bslmmdap.h" #include "gemma.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "ldr.h" #include "lm.h" diff --git a/src/io.cpp b/src/gemma_io.cpp index 923eb11..818c5e8 100644 --- a/src/io.cpp +++ b/src/gemma_io.cpp @@ -45,7 +45,7 @@ // #include "eigenlib.h" #include "fastblas.h" #include "gzstream.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "mathfunc.h" diff --git a/src/io.h b/src/gemma_io.h index dd1d5c0..dd1d5c0 100644 --- a/src/io.h +++ b/src/gemma_io.h @@ -21,7 +21,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" -#include "io.h" +#include "gemma_io.h" #include "param.h" using namespace std; diff --git a/src/lmm.cpp b/src/lmm.cpp index 5822275..acd9667 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -43,7 +43,7 @@ // #include "eigenlib.h" #include "gzstream.h" -#include "io.h" +#include "gemma_io.h" #include "fastblas.h" #include "lapack.h" #include "lmm.h" @@ -23,7 +23,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" -#include "io.h" +#include "gemma_io.h" #include "param.h" #include <functional> #include <tuple> diff --git a/src/main.cpp b/src/main.cpp index 706ac35..deadc63 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -58,7 +58,11 @@ int main(int argc, char *argv[]) { ifstream check_dir((cPar.path_out).c_str()); if (!check_dir) { - mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH); + #ifdef WINDOWS + mkdir((cPar.path_out).c_str()); + #else + mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH); + #endif } if (!is_quiet_mode()) diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index eee562d..14f8b4a 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -39,7 +39,7 @@ #include "fastblas.h" #include "gzstream.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "lmm.h" #include "mvlmm.h" diff --git a/src/mvlmm.h b/src/mvlmm.h index 4329ad1..b92bd5e 100644 --- a/src/mvlmm.h +++ b/src/mvlmm.h @@ -21,7 +21,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" -#include "io.h" +#include "gemma_io.h" #include "param.h" using namespace std; diff --git a/src/param.cpp b/src/param.cpp index bf6c195..edee79d 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -35,7 +35,7 @@ #include "gsl/gsl_vector.h" #include "eigenlib.h" -#include "io.h" +#include "gemma_io.h" #include "mathfunc.h" #include "param.h" @@ -521,7 +521,7 @@ void PARAM::CheckParam(void) { a_mode != 71) { cout << "error! unknown analysis mode: " << a_mode << ". make sure -gk or -eigen or -lmm or -bslmm -predict or " - << "-calccov is sepcified correctly." << endl; + << "-calccov is specified correctly." << endl; error = true; } if (miss_level > 1) { diff --git a/src/prdt.cpp b/src/prdt.cpp index fc0abe8..42912a8 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -32,8 +32,7 @@ #include <vector> #include "gzstream.h" -#include "io.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "mathfunc.h" #include "prdt.h" diff --git a/src/varcov.cpp b/src/varcov.cpp index e9c7295..055d3b1 100644 --- a/src/varcov.cpp +++ b/src/varcov.cpp @@ -37,7 +37,7 @@ #include "gsl/gsl_vector.h" #include "gzstream.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "mathfunc.h" #include "param.h" diff --git a/src/varcov.h b/src/varcov.h index 47b4f9d..fa707d5 100644 --- a/src/varcov.h +++ b/src/varcov.h @@ -21,7 +21,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" -#include "io.h" +#include "gemma_io.h" #include "param.h" using namespace std; @@ -45,7 +45,7 @@ #include "eigenlib.h" #include "gzstream.h" -#include "io.h" +#include "gemma_io.h" #include "lapack.h" #include "lmm.h" #include "mathfunc.h" @@ -21,7 +21,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" -#include "io.h" +#include "gemma_io.h" #include "param.h" using namespace std; |