aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile44
-rw-r--r--README.md9
-rw-r--r--doc/compile_GEMMA_win64.txt48
-rw-r--r--doc/manual.pdfbin269308 -> 319480 bytes
-rw-r--r--doc/manual.tex4
-rw-r--r--example/demo.txt4
-rw-r--r--scripts/gen_version_info.cmd16
-rw-r--r--src/bslmmdap.cpp2
-rw-r--r--src/gemma.cpp2
-rw-r--r--src/gemma_io.cpp (renamed from src/io.cpp)2
-rw-r--r--src/gemma_io.h (renamed from src/io.h)0
-rw-r--r--src/lm.h2
-rw-r--r--src/lmm.cpp2
-rw-r--r--src/lmm.h2
-rw-r--r--src/main.cpp6
-rw-r--r--src/mvlmm.cpp2
-rw-r--r--src/mvlmm.h2
-rw-r--r--src/param.cpp4
-rw-r--r--src/prdt.cpp3
-rw-r--r--src/varcov.cpp2
-rw-r--r--src/varcov.h2
-rw-r--r--src/vc.cpp2
-rw-r--r--src/vc.h2
23 files changed, 133 insertions, 29 deletions
diff --git a/Makefile b/Makefile
index f8bacbf..bdb58b2 100644
--- a/Makefile
+++ b/Makefile
@@ -41,7 +41,19 @@
GEMMA_VERSION = $(shell cat ./VERSION)
# Set this variable to either LNX or MAC
-SYS = LNX # LNX|MAC (Linux is the default)
+ifeq ($(OS),Windows_NT)
+ SYS = WIN
+ VGEN = scripts/gen_version_info.cmd
+else
+ UNAME_S := $(shell uname -s)
+ ifeq ($(UNAME_S),Darwin)
+ SYS = MAC
+ else
+ SYS = LNX # default to linux
+ endif
+ VGEN = scripts/gen_version_info.sh
+endif
+
# Leave blank after "=" to disable; put "= 1" to enable
DIST_NAME = gemma-$(GEMMA_VERSION)
DEBUG = 1 # DEBUG mode, set DEBUG=0 for a release
@@ -53,8 +65,20 @@ OPENBLAS_LEGACY = # Using older OpenBlas
FORCE_STATIC = # Static linking of libraries
GCC_FLAGS = -Wall -O3 -std=gnu++11 # extra flags -Wl,--allow-multiple-definition
TRAVIS_CI = # used by TRAVIS for testing
-EIGEN_INCLUDE_PATH = /usr/include/eigen3
-OPENBLAS_INCLUDE_PATH = /usr/local/opt/openblas/include
+
+GSL_INCLUDE_PATH =
+ifeq ($(SYS), WIN)
+ GSL_INCLUDE_PATH = -isystemc:/MinGW/include -LC:/MinGW/lib
+ EIGEN_INCLUDE_PATH = ../eigen-git-mirror
+ OPENBLAS_INCLUDE_PATH = ../OpenBLAS-v0.2.19-Win64-int32/include -L../OpenBLAS-v0.2.19-Win64-int32/lib
+else
+ OPENBLAS_INCLUDE_PATH = /usr/local/opt/openblas/include
+ ifeq ($(SYS), MAC)
+ EIGEN_INCLUDE_PATH = /usr/local/include/eigen3
+ else
+ EIGEN_INCLUDE_PATH = /usr/include/eigen3
+ endif
+endif
# --------------------------------------------------------------------
# Edit below this line with caution
@@ -73,13 +97,13 @@ endif
ifeq ($(CPP), clang++)
# macOS Homebrew settings (as used on Travis-CI)
- GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem/$(OPENBLAS_INCLUDE_PATH) -isystem//usr/local/include/eigen3 -Wl,-L/usr/local/opt/openblas/lib
+ GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem$(OPENBLAS_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Wl,-L/usr/local/opt/openblas/lib
endif
ifdef WITH_OPENBLAS
OPENBLAS=1
# WITH_LAPACK = # OPENBLAS usually includes LAPACK
- CPPFLAGS += -DOPENBLAS -isystem/$(OPENBLAS_INCLUDE_PATH)
+ CPPFLAGS += -DOPENBLAS -isystem$(OPENBLAS_INCLUDE_PATH)
ifdef OPENBLAS_LEGACY
# Legacy version (mostly for Travis-CI)
CPPFLAGS += -DOPENBLAS_LEGACY
@@ -87,10 +111,14 @@ ifdef WITH_OPENBLAS
endif
ifdef DEBUG
- CPPFLAGS += -g $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+ CPPFLAGS += -g $(GCC_FLAGS) $(GSL_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
else
# release mode
- CPPFLAGS += -DNDEBUG $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+ CPPFLAGS += -DNDEBUG $(GCC_FLAGS) $(GSL_INCLUDE_PATH) -isystem$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+endif
+
+ifeq ($(SYS), WIN)
+ CPPFLAGS += -Duint="unsigned int" -D__CRT__NO_INLINE -D__STRING="__STRINGIFY" -DWINDOWS -DWITH_GSLCBLAS=1
endif
ifdef SHOW_COMPILER_WARNINGS
@@ -149,7 +177,7 @@ OBJS = $(SOURCES:.cpp=.o)
all: $(OUTPUT)
./src/version.h:
- ./scripts/gen_version_info.sh > src/version.h
+ $(VGEN) > src/version.h
$(OUTPUT): $(OBJS)
$(CPP) $(CPPFLAGS) $(OBJS) $(LIBS) -o $(OUTPUT)
diff --git a/README.md b/README.md
index 355a3a9..8c97aee 100644
--- a/README.md
+++ b/README.md
@@ -88,11 +88,14 @@ numerical libraries.
gemma.linux.gz` to unpack the file.
-## Quick start
+## Help materials
-1. Work through the demo. *Give more details here.*
++ [The GEMMA manual](doc/manual.pdf).
-2. Read the manual and run `gemma -h`. *Give more details here.*
++ [Detailed example with HS mouse data](example/demo.txt).
+
++ [Tutorial on GEMMA for genome-wide association
+analysis](https://github.com/rcc-uchicago/genetic-data-analysis-2).
## Citing GEMMA
diff --git a/doc/compile_GEMMA_win64.txt b/doc/compile_GEMMA_win64.txt
new file mode 100644
index 0000000..84f16ec
--- /dev/null
+++ b/doc/compile_GEMMA_win64.txt
@@ -0,0 +1,48 @@
+// install R 3.4.3
+https://cran.r-project.org/bin/windows/base/
+
+// install Rtools 3.4
+https://cran.r-project.org/bin/windows/Rtools/
+
+// Download openblas (v0.2.19-Win64-int32)
+https://sourceforge.net/projects/openblas/files/v0.2.19/
+
+// Make a place to store the files
+mkdir Github
+cd Github/
+
+// Clone the required dependancies
+git clone https://github.com/eigenteam/eigen-git-mirror.git
+git clone https://github.com/genetics-statistics/GEMMA.git
+
+// Download and install gsl2.4, unzip/tar into Github
+http://gnu.askapache.com/gsl/
+
+// Download and install msys from http://downloads.sourceforge.net/mingw/MSYS-1.0.11.exe
+cd c:/msys/1.0
+// Run mysys
+msys.bat
+
+// Under msys, Compile GSL-2.4 inside the msys
+cd /c/
+cd Github/gsl-2.4
+./configure --prefix=C:/MinGW
+make -j 4
+make install
+
+// Building Gemma on the R tool chain under windows using CMD compile gemma
+cd gemma
+make -j 2
+
+// Get all the DLLs from:
+- MinGW DLLs: https://sourceforge.net/projects/openblas/files/v0.2.12/mingw64_dll.zip/download
+- DLLs from the compiled gsl-2.4
+- DLLs from openBLAS
+
+// Required DLLs:
++ libgcc_s_seh-1.dll
++ libgfortran-3.dll
++ libgsl-23.dll
++ libgslcblas-0.dll
++ libopenblas.dll
++ libquadmath-0.dll
diff --git a/doc/manual.pdf b/doc/manual.pdf
index b760cc1..1b7dc5d 100644
--- a/doc/manual.pdf
+++ b/doc/manual.pdf
Binary files differ
diff --git a/doc/manual.tex b/doc/manual.tex
index 1e042e7..8e5efe2 100644
--- a/doc/manual.tex
+++ b/doc/manual.tex
@@ -1373,6 +1373,10 @@ format. In addition, to fit MQS-LDW, you will need to add "-wcat
specifies the LD score file, which can be provided in a gzip
compressed format.
+A feature of MQS based variance component estimation is that one only need to use a subset of samples to estimate certain quantities. Using a subset of samples dramatically improves computation speed while maintaining variance component estimation accuracy. To take this strategy, one can use ``-sample [num]" to use a fixed number of random samples to perform estimation.
+
+Instead of using the genotype data from the study, one can also use genotype data from a reference panel. For example, one can use the genotype data from the 1000 genomes project as the reference. However, any population stratification in the reference panel should be dealt with first. For example, the individuals with European ancestry in the 1000 genomes project come from five subpopulations: CEU, FIN, GBR, IBS, and TSI. MQS computes SNP correlations across all SNP pairs as it should be under the LMM assumption. Therefore, any population stratification in the reference panel would increase the overall SNP correlation estimate, leading to down-ward bias in the final heritability estimate. To address the population stratification in the reference panel, one can include a few dummy variables in the model fitting step as covariates. These covariates represent, for example, the five subpopulations, and are used to effectively center the genotype mean in each subpopulation separately. To do this, one can create a covariate file containing five columns (no header): the first column is all 1 representing the intercept; the second column is 1 for CEU and 0 for others; the third column is 1 for FIN and 0 for others; ...; while the fifth column is 1 for IBS and 0 for others. Afterwards, one can add "-c [filename]" to include this covariate file in the command line.
+
\subsubsection{Detailed Information}
MQS-LDW uses an iterative procedure to update the variance
diff --git a/example/demo.txt b/example/demo.txt
index 9452e04..9b22175 100644
--- a/example/demo.txt
+++ b/example/demo.txt
@@ -98,7 +98,9 @@ chr rs ps n_miss allele1 allele0 af beta_1 beta_2 Vbeta_1_1 Vbeta_1_2 Vbeta_2_2
## To fit BSLMM in the training set:
## To fit a quantitative trait
-../bin/gemma -g mouse_hs1940.geno.txt.gz -p mouse_hs1940.pheno.txt -n 2 -a mouse_hs1940.anno.txt -bslmm -o mouse_hs1940_CD8_bslmm -w 1000 -s 10000 -seed 1
+../bin/gemma -g mouse_hs1940.geno.txt.gz -p mouse_hs1940.pheno.txt -n 2 \
+ -a mouse_hs1940.anno.txt -bslmm -o mouse_hs1940_CD8_bslmm \
+ -w 1000 -s 10000 -seed 1
# the following three files may be of most importance:
# the *.hyp.txt contains a column for pve and pge
diff --git a/scripts/gen_version_info.cmd b/scripts/gen_version_info.cmd
new file mode 100644
index 0000000..d824687
--- /dev/null
+++ b/scripts/gen_version_info.cmd
@@ -0,0 +1,16 @@
+@echo off
+rem https://stackoverflow.com/questions/3472631/how-do-i-get-the-day-month-and-year-from-a-windows-cmd-exe-script
+FOR /F "skip=1 tokens=1-6" %%A IN ('WMIC Path Win32_LocalTime Get Day^,Hour^,Minute^,Month^,Second^,Year /Format:table') DO (
+ if "%%B" NEQ "" (
+ SET /A FDATE=%%F*10000+%%D*100+%%A
+ )
+)
+set year=%FDATE:~0,4%
+set /p version=<VERSION
+
+echo // version.h generated by GEMMA
+rem https://stackoverflow.com/questions/7105433/windows-batch-echo-without-new-line
+echo|set /p="#define GEMMA_VERSION ""
+echo %version%"
+echo #define GEMMA_DATE "%FDATE:~0,8%"
+echo #define GEMMA_YEAR "%year%"
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index e9900e3..6220bb5 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -39,7 +39,7 @@
#include "gsl/gsl_vector.h"
#include "bslmmdap.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "lm.h"
#include "lmm.h"
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 97e947a..758aa24 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -49,7 +49,7 @@ extern "C" {
#include "bslmm.h"
#include "bslmmdap.h"
#include "gemma.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "ldr.h"
#include "lm.h"
diff --git a/src/io.cpp b/src/gemma_io.cpp
index 923eb11..818c5e8 100644
--- a/src/io.cpp
+++ b/src/gemma_io.cpp
@@ -45,7 +45,7 @@
// #include "eigenlib.h"
#include "fastblas.h"
#include "gzstream.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "mathfunc.h"
diff --git a/src/io.h b/src/gemma_io.h
index dd1d5c0..dd1d5c0 100644
--- a/src/io.h
+++ b/src/gemma_io.h
diff --git a/src/lm.h b/src/lm.h
index 030e6f9..bcec0ab 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -21,7 +21,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
-#include "io.h"
+#include "gemma_io.h"
#include "param.h"
using namespace std;
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 5822275..acd9667 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -43,7 +43,7 @@
// #include "eigenlib.h"
#include "gzstream.h"
-#include "io.h"
+#include "gemma_io.h"
#include "fastblas.h"
#include "lapack.h"
#include "lmm.h"
diff --git a/src/lmm.h b/src/lmm.h
index e9740d5..f603599 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -23,7 +23,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
-#include "io.h"
+#include "gemma_io.h"
#include "param.h"
#include <functional>
#include <tuple>
diff --git a/src/main.cpp b/src/main.cpp
index 706ac35..deadc63 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -58,7 +58,11 @@ int main(int argc, char *argv[]) {
ifstream check_dir((cPar.path_out).c_str());
if (!check_dir) {
- mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH);
+ #ifdef WINDOWS
+ mkdir((cPar.path_out).c_str());
+ #else
+ mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH);
+ #endif
}
if (!is_quiet_mode())
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index eee562d..14f8b4a 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -39,7 +39,7 @@
#include "fastblas.h"
#include "gzstream.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "lmm.h"
#include "mvlmm.h"
diff --git a/src/mvlmm.h b/src/mvlmm.h
index 4329ad1..b92bd5e 100644
--- a/src/mvlmm.h
+++ b/src/mvlmm.h
@@ -21,7 +21,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
-#include "io.h"
+#include "gemma_io.h"
#include "param.h"
using namespace std;
diff --git a/src/param.cpp b/src/param.cpp
index bf6c195..edee79d 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -35,7 +35,7 @@
#include "gsl/gsl_vector.h"
#include "eigenlib.h"
-#include "io.h"
+#include "gemma_io.h"
#include "mathfunc.h"
#include "param.h"
@@ -521,7 +521,7 @@ void PARAM::CheckParam(void) {
a_mode != 71) {
cout << "error! unknown analysis mode: " << a_mode
<< ". make sure -gk or -eigen or -lmm or -bslmm -predict or "
- << "-calccov is sepcified correctly." << endl;
+ << "-calccov is specified correctly." << endl;
error = true;
}
if (miss_level > 1) {
diff --git a/src/prdt.cpp b/src/prdt.cpp
index fc0abe8..42912a8 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -32,8 +32,7 @@
#include <vector>
#include "gzstream.h"
-#include "io.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "mathfunc.h"
#include "prdt.h"
diff --git a/src/varcov.cpp b/src/varcov.cpp
index e9c7295..055d3b1 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -37,7 +37,7 @@
#include "gsl/gsl_vector.h"
#include "gzstream.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "mathfunc.h"
#include "param.h"
diff --git a/src/varcov.h b/src/varcov.h
index 47b4f9d..fa707d5 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -21,7 +21,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
-#include "io.h"
+#include "gemma_io.h"
#include "param.h"
using namespace std;
diff --git a/src/vc.cpp b/src/vc.cpp
index 1a16c07..416a3e4 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -45,7 +45,7 @@
#include "eigenlib.h"
#include "gzstream.h"
-#include "io.h"
+#include "gemma_io.h"
#include "lapack.h"
#include "lmm.h"
#include "mathfunc.h"
diff --git a/src/vc.h b/src/vc.h
index 49397bb..3586433 100644
--- a/src/vc.h
+++ b/src/vc.h
@@ -21,7 +21,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
-#include "io.h"
+#include "gemma_io.h"
#include "param.h"
using namespace std;