From b9758364059d52e153a9f1b4fcae3bc3f3e68422 Mon Sep 17 00:00:00 2001
From: Pjotr Prins
Date: Fri, 7 Jul 2017 06:54:26 +0000
Subject: Fix spacing
---
Makefile.linux | 10 +-
Makefile.macosx | 8 +-
README.md | 6 +-
doc/manual.tex | 30 +--
src/bslmm.cpp | 780 +++++++++++++++++++++++++++----------------------------
src/bslmm.h | 48 ++--
src/bslmmdap.cpp | 60 ++---
src/bslmmdap.h | 36 +--
src/eigenlib.cpp | 4 +-
src/eigenlib.h | 2 +-
src/gemma.cpp | 46 ++--
src/gemma.h | 8 +-
src/gzstream.cpp | 6 +-
src/gzstream.h | 16 +-
src/lapack.h | 2 +-
src/ldr.cpp | 18 +-
src/ldr.h | 26 +-
src/lm.cpp | 6 +-
src/lm.h | 2 +-
src/lmm.cpp | 50 ++--
src/lmm.h | 4 +-
src/logistic.h | 150 +++++------
src/main.cpp | 30 +--
src/mathfunc.cpp | 2 +-
src/param.cpp | 24 +-
src/param.h | 6 +-
src/prdt.cpp | 188 +++++++-------
src/prdt.h | 14 +-
src/varcov.cpp | 56 ++--
src/varcov.h | 4 +-
src/vc.cpp | 26 +-
31 files changed, 834 insertions(+), 834 deletions(-)
diff --git a/Makefile.linux b/Makefile.linux
index 0b2c0b9..b51bbfe 100644
--- a/Makefile.linux
+++ b/Makefile.linux
@@ -14,8 +14,8 @@ SYS = LNX
# Disable WITH_LAPACK option can slow computation speed significantly and is not recommended
# Disable WITH_ARPACK option only disable -apprx option in the software
WITH_LAPACK = 1
-FORCE_32BIT =
-FORCE_DYNAMIC =
+FORCE_32BIT =
+FORCE_DYNAMIC =
DIST_NAME = gemma-0.96alpha
# --------------------------------------------------------------------
@@ -45,7 +45,7 @@ OUTPUT = $(BIN_DIR)/gemma
SOURCES = $(SRC_DIR)/main.cpp
-HDR =
+HDR =
# Detailed libary paths, D for dynamic and S for static
LIBS_LNX_D_LAPACK = -llapack
@@ -54,7 +54,7 @@ LIBS_LNX_S_LAPACK = /software/atlas-3.10.3-el7-x86_64/lib/liblapack.a \
/software/atlas-3.10.3-el7-x86_64/lib/libcblas.a \
/software/atlas-3.10.3-el7-x86_64/lib/libf77blas.a \
/software/atlas-3.10.3-el7-x86_64/lib/libatlas.a -lgfortran \
- -Wl,--allow-multiple-definition
+ -Wl,--allow-multiple-definition
SOURCES += $(SRC_DIR)/param.cpp $(SRC_DIR)/gemma.cpp $(SRC_DIR)/io.cpp $(SRC_DIR)/lm.cpp $(SRC_DIR)/lmm.cpp $(SRC_DIR)/vc.cpp $(SRC_DIR)/mvlmm.cpp $(SRC_DIR)/bslmm.cpp $(SRC_DIR)/prdt.cpp $(SRC_DIR)/mathfunc.cpp $(SRC_DIR)/gzstream.cpp $(SRC_DIR)/eigenlib.cpp $(SRC_DIR)/ldr.cpp $(SRC_DIR)/bslmmdap.cpp $(SRC_DIR)/logistic.cpp $(SRC_DIR)/varcov.cpp
HDR += $(SRC_DIR)/param.h $(SRC_DIR)/gemma.h $(SRC_DIR)/io.h $(SRC_DIR)/lm.h $(SRC_DIR)/lmm.h $(SRC_DIR)/vc.h $(SRC_DIR)/mvlmm.h $(SRC_DIR)/bslmm.h $(SRC_DIR)/prdt.h $(SRC_DIR)/mathfunc.h $(SRC_DIR)/gzstream.h $(SRC_DIR)/eigenlib.h
@@ -91,7 +91,7 @@ $(OUTPUT): $(OBJS)
$(OBJS) : $(HDR)
-.cpp.o:
+.cpp.o:
$(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o
.SUFFIXES : .cpp .c .o $(SUFFIXES)
diff --git a/Makefile.macosx b/Makefile.macosx
index 992e442..00467d6 100644
--- a/Makefile.macosx
+++ b/Makefile.macosx
@@ -14,7 +14,7 @@ SYS = MAC
# Disable WITH_LAPACK option can slow computation speed significantly and is not recommended
# Disable WITH_ARPACK option only disable -apprx option in the software
WITH_LAPACK = 1
-FORCE_32BIT =
+FORCE_32BIT =
FORCE_DYNAMIC = 1
DIST_NAME = gemma-0.96
@@ -37,13 +37,13 @@ OUTPUT = $(BIN_DIR)/gemma
SOURCES = $(SRC_DIR)/main.cpp
-HDR =
+HDR =
# Detailed libary paths, D for dynamic and S for static
LIBS_LNX_D_LAPACK = -llapack
LIBS_MAC_D_LAPACK = -framework Accelerate
-LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
+LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
# Options
@@ -82,7 +82,7 @@ $(OUTPUT): $(OBJS)
$(OBJS) : $(HDR)
-.cpp.o:
+.cpp.o:
$(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o
.SUFFIXES : .cpp .c .o $(SUFFIXES)
diff --git a/README.md b/README.md
index 19f66a5..8006eb8 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ MQS algorithm to estimate variance components.
If you use GEMMA for published work, please cite our paper:
+ Xiang Zhou and Matthew Stephens (2012). [Genome-wide efficient
-mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310)
+mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310)
*Nature Genetics* **44**, 821–824.
If you use the multivariate linear mixed model (mvLMM) in your
@@ -77,7 +77,7 @@ If you use the Bayesian sparse linear mixed model (BSLMM), please cite:
+ Xiang Zhou, Peter Carbonetto and Matthew Stephens (2013). [Polygenic
modeling with bayesian sparse linear mixed
models.](http://doi.org/10.1371/journal.pgen.1003264) *PLoS Genetics*
-**9**, e1003264.
+**9**, e1003264.
And if you use of the variance component estimation using summary
statistics, please cite:
@@ -118,7 +118,7 @@ This is the current structure of the GEMMA source repository:
subfolders; see Wilson et al "Good Enough Practices" paper for example
of this.*
-## Setup
+## Setup
There are two ways to install GEMMA:
diff --git a/doc/manual.tex b/doc/manual.tex
index d47aa22..c0119dc 100644
--- a/doc/manual.tex
+++ b/doc/manual.tex
@@ -100,11 +100,11 @@ available open-source numerical libraries.
Stephens (2014). Efficient multivariate linear mixed model
algorithms for genome-wide association studies. Nature Methods. 11:
407-409.
-
+
\item Bayesian sparse linear mixed models \\ Xiang Zhou, Peter
Carbonetto and Matthew Stephens (2013). Polygenic modeling with
Bayesian sparse linear mixed models. PLoS Genetics. 9(2): e1003264.
-
+
\item Variance component estimation with individual-level or summary
data\\ Xiang Zhou (2016). A unified framework for variance component
estimation with summary statistics in genome-wide association
@@ -212,10 +212,10 @@ effects terms ($\mathbf X\boldsymbol\beta$). These two parameters are
defined as follows:
\begin{align*}
\mbox{PVE}(\bbeta, \bu, \tau) &
- :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\
+ :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\
\mbox{PGE}(\bbeta, \bu)
& :=\frac{\mbox{V}(\bX\bbeta)}{\mbox{V}(\bX\bbeta+\bu)}, \\
-\intertext{where}
+\intertext{where}
\mbox{V}({\bf x}) &:=\frac{1}{n}\sum_{i=1}^n (x_i-\overline {x})^2.
\end{align*}
@@ -429,7 +429,7 @@ BIMBAM mean genotype file from IMPUTE genotype files
(\url{http://www.stats.ox.ac.uk/~marchini/software/gwas/file_format.html})\cite{Howie:2009}:
%
\begin{verbatim}
-cat [impute filename] | awk -v s=[number of samples/individuals]
+cat [impute filename] | awk -v s=[number of samples/individuals]
'{ printf $2 "," $4 "," $5; for(i=1; i<=s; i++) printf "," $(i*3+3)*2+$(i*3+4); printf "\n" }'
> [bimbam filename]
\end{verbatim}
@@ -815,7 +815,7 @@ chr rs ps n_mis n_obs allele1 allele0 af beta se
\end{verbatim}
%
-The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test.
+The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test.
\subsection{Estimate Relatedness Matrix from Genotypes}
@@ -1009,7 +1009,7 @@ ped format or the BIMBAM format are:
\begin{verbatim}
./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] [num3] -o [prefix]
-./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num]
+./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num]
-n [num1] [num2] [num3] -o [prefix]
\end{verbatim}
@@ -1041,7 +1041,7 @@ missing, one can impute these missing values before association tests:
\begin{verbatim}
./gemma -bfile [prefix] -k [filename] -predict -n [num1] [num2] [num3] -o [prefix]
-./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict
+./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict
-n [num1] [num2] [num3] -o [prefix]
\end{verbatim}
@@ -1154,10 +1154,10 @@ below:
%
\begin{verbatim}
h pve rho pge pi n_gamma
-4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25
-5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
-5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
-6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25
+4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25
+5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
+5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
+6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25
5.479237e-01 6.228036e-01 3.231856e-01 4.326231e-01 2.164183e-03 27
\end{verbatim}
%
@@ -1196,9 +1196,9 @@ The basic usages for association analysis with either the PLINK binary
ped format or the BIMBAM format are:
\begin{verbatim}
-./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename]
+./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename]
-predict [num] -o [prefix]
-./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename]
+./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename]
-k [filename] -predict [num] -o [prefix]
\end{verbatim}
@@ -1544,7 +1544,7 @@ source code for detailed examples.
\item \textcolor{red}{-ci [num]} \quad specify fitting algorithm to compute the standard errors. (default 1; valid value 1-2; 1: MQS-HEW; 2: MQS-LDW.)
\end{itemize}
%
-
+
\clearpage
\bibliographystyle{plain}
diff --git a/src/bslmm.cpp b/src/bslmm.cpp
index 563b743..d579802 100644
--- a/src/bslmm.cpp
+++ b/src/bslmm.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
@@ -24,7 +24,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -50,32 +50,32 @@ using namespace std;
void BSLMM::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
- l_min=cPar.h_min;
- l_max=cPar.h_max;
- n_region=cPar.n_region;
+
+ l_min=cPar.h_min;
+ l_max=cPar.h_max;
+ n_region=cPar.n_region;
pve_null=cPar.pve_null;
pheno_mean=cPar.pheno_mean;
-
+
time_UtZ=0.0;
time_Omega=0.0;
n_accept=0;
-
- h_min=cPar.h_min;
- h_max=cPar.h_max;
+
+ h_min=cPar.h_min;
+ h_max=cPar.h_max;
h_scale=cPar.h_scale;
- rho_min=cPar.rho_min;
- rho_max=cPar.rho_max;
+ rho_min=cPar.rho_min;
+ rho_max=cPar.rho_max;
rho_scale=cPar.rho_scale;
- logp_min=cPar.logp_min;
- logp_max=cPar.logp_max;
+ logp_min=cPar.logp_min;
+ logp_max=cPar.logp_max;
logp_scale=cPar.logp_scale;
-
+
s_min=cPar.s_min;
s_max=cPar.s_max;
w_step=cPar.w_step;
@@ -86,17 +86,17 @@ void BSLMM::CopyFromParam (PARAM &cPar) {
geo_mean=cPar.geo_mean;
randseed=cPar.randseed;
trace_G=cPar.trace_G;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -108,7 +108,7 @@ void BSLMM::CopyToParam (PARAM &cPar) {
cPar.n_accept=n_accept;
cPar.pheno_mean=pheno_mean;
cPar.randseed=randseed;
-
+
return;
}
@@ -119,28 +119,28 @@ void BSLMM::WriteBV (const gsl_vector *bv) {
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
- cout<<"error writing file: "< > &beta_g,
+void BSLMM::WriteParam (vector > &beta_g,
const gsl_vector *alpha, const size_t w) {
string file_str;
file_str=path_out+"/"+file_out;
@@ -148,20 +148,20 @@ void BSLMM::WriteParam (vector > &beta_g,
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
- cout<<"error writing file: "< > &beta_g,
outfile<<0.0<<"\t"<<0.0< &rank) {
size_t pos;
for (size_t i=0; isize2, UtXgamma->size2);
gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2);
gsl_matrix_set_identity (Omega);
- gsl_matrix_scale (Omega, 1.0/sigma_a2);
+ gsl_matrix_scale (Omega, 1.0/sigma_a2);
lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma,
1.0, Omega);
gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
CholeskySolve(Omega, Xty, OiXty);
-
+
gsl_blas_ddot (Xty, OiXty, &pve);
gsl_blas_ddot (Uty, Uty, &var_y);
-
+
pve/=var_y;
-
+
gsl_matrix_free (Omega);
gsl_vector_free (Xty);
gsl_vector_free (OiXty);
@@ -342,28 +342,28 @@ double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
return pve;
}
-void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
- vector &rank, class HYPBSLMM &cHyp,
+void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
+ vector &rank, class HYPBSLMM &cHyp,
vector > &pos_loglr) {
double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1);
-
+
cHyp.n_gamma=0;
for (size_t i=0; iq_genome) {cHyp.n_gamma++;}
}
if (cHyp.n_gamma<10) {cHyp.n_gamma=10;}
-
+
if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;}
- if (cHyp.n_gamma1.0) {cHyp.rho=1.0;}
-
+
if (cHyp.hh_max) {cHyp.h=h_max;}
if (cHyp.rhorho_max) {cHyp.rho=rho_max;}
if (cHyp.logplogp_max) {cHyp.logp=logp_max;}
-
+
cout<<"initial value of h = "<size);
+
+ gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size);
gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
-
+
double logpost=0.0;
double d, ds, uy, Hi_yy=0, logdet_H=0.0;
for (size_t i=0; isize1,
- UtXgamma->size2);
+
+ gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
+ UtXgamma->size2);
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
+ gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
+
gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
+
logdet_H=0.0; P_yy=0.0;
for (size_t i=0; isize; i++)
{
- d=gsl_ran_gaussian(gsl_r, 1);
- gsl_vector_set(beta, i, d);
+ d=gsl_ran_gaussian(gsl_r, 1);
+ gsl_vector_set(beta, i, d);
}
- gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
-
+ gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
+
// This computes inv(L^T(Omega)) %*% beta.
gsl_vector_scale(beta, sqrt(sigma_a2/tau));
- gsl_vector_add(beta, beta_hat);
+ gsl_vector_add(beta, beta_hat);
gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
-
+
// Sample alpha.
gsl_vector_memcpy (alpha_prime, Uty);
gsl_vector_sub (alpha_prime, UtXb);
gsl_vector_mul (alpha_prime, weight_Hi);
gsl_vector_scale (alpha_prime, sigma_b2);
-
+
// Sample u.
gsl_vector_memcpy (Utu, alpha_prime);
gsl_vector_mul (Utu, K_eval);
-
+
if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
- gsl_vector_add (Utu, Utu_rand);
-
+ gsl_vector_add (Utu, Utu_rand);
+
// For quantitative traits, calculate pve and pge.
if (a_mode==11) {
gsl_blas_ddot (UtXb, UtXb, &d);
cHyp.pge=d/(double)ni_test;
-
+
gsl_blas_ddot (Utu, Utu, &d);
cHyp.pve=cHyp.pge+d/(double)ni_test;
-
+
if (cHyp.pve==0) {cHyp.pge=0.0;}
else {cHyp.pge/=cHyp.pve;}
- cHyp.pve/=cHyp.pve+1.0/tau;
- }
+ cHyp.pve/=cHyp.pve+1.0/tau;
+ }
gsl_matrix_free (UtXgamma_eval);
gsl_matrix_free (Omega);
gsl_vector_free (XtHiy);
gsl_vector_free (beta_hat);
- gsl_vector_free (Utu_rand);
+ gsl_vector_free (Utu_rand);
gsl_vector_free (weight_Hi);
-
+
logpost=-0.5*logdet_H-0.5*logdet_O;
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
else {logpost-=0.5*P_yy;}
logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
+
return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
+void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
gsl_vector *z_hat, class HYPBSLMM &cHyp) {
double d;
-
+
gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=d/(double)ni_test;
-
+ cHyp.pve=d/(double)ni_test;
+
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
-
+
cHyp.pve/=cHyp.pve+1.0;
- cHyp.pge=0.0;
-
+ cHyp.pge=0.0;
+
return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
- const gsl_vector *Utu, gsl_vector *z_hat,
+void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
+ const gsl_vector *Utu, gsl_vector *z_hat,
class HYPBSLMM &cHyp) {
double d;
gsl_vector *UtXbU=gsl_vector_alloc (Utu->size);
-
+
gsl_blas_ddot (UtXb, UtXb, &d);
cHyp.pge=d/(double)ni_test;
-
+
gsl_blas_ddot (Utu, Utu, &d);
cHyp.pve=cHyp.pge+d/(double)ni_test;
-
+
gsl_vector_memcpy (UtXbU, Utu);
gsl_vector_add (UtXbU, UtXb);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
-
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
+
if (cHyp.pve==0) {cHyp.pge=0.0;}
else {cHyp.pge/=cHyp.pve;}
-
+
cHyp.pve/=cHyp.pve+1.0;
-
+
gsl_vector_free(UtXbU);
return;
}
-void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
- gsl_vector *z) {
+void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
+ gsl_vector *z) {
double d1, d2, z_rand=0.0;
for (size_t i=0; isize; ++i) {
d1=gsl_vector_get (y, i);
@@ -634,7 +634,7 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
if (d1<=0.0) {
// Control, right truncated.
- do {
+ do {
z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand>0.0);
}
@@ -643,25 +643,25 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand<0.0);
}
-
+
gsl_vector_set (z, i, z_rand);
}
return;
}
-double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
+double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
-
+
double h=cHyp_old.h, rho=cHyp_old.rho;
-
+
double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale;
-
+
for (size_t i=0; ih_max) {h=2*h_max-h;}
-
+
rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho;
if (rhorho_max) {rho=2*rho_max-rho;}
@@ -671,13 +671,13 @@ double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
return 0.0;
}
-double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
+double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp;
double log_ratio=0.0;
-
+
double d_logp=min(0.1, (logp_max-logp_min)*logp_scale);
-
+
for (size_t i=0; i &rank_old,
- vector &rank_new,
- const double *p_gamma,
- const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new,
+double BSLMM::ProposeGamma (const vector &rank_old,
+ vector &rank_new,
+ const double *p_gamma,
+ const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new,
const size_t &repeat) {
map mapRank2in;
size_t r;
double unif, logp=0.0;
int flag_gamma;
size_t r_add, r_remove, col_id;
-
+
rank_new.clear();
if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"< &rank_old,
mapRank2in[r]=1;
}
}
- cHyp_new.n_gamma=cHyp_old.n_gamma;
-
+ cHyp_new.n_gamma=cHyp_old.n_gamma;
+
for (size_t i=0; i=0.40 && unif < 0.80 &&
+ else if (unif>=0.40 && unif < 0.80 &&
cHyp_new.n_gamma>s_min) {
flag_gamma=2;
}
- else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
+ else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
cHyp_new.n_gamma &rank_old,
// Delete a SNP.
col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
r_remove=rank_new[col_id];
-
+
double prob_total=1.0;
for (size_t i=0; i &rank_old,
// Be careful with the proposal.
do {
r_add=gsl_ran_discrete (gsl_r, gsl_t);
- } while (mapRank2in.count(r_add)!=0);
-
+ } while (mapRank2in.count(r_add)!=0);
+
double prob_total=1.0;
for (size_t i=0; i &rank_old,
}
else {logp+=0;} // Do not change.
}
-
+
stable_sort (rank_new.begin(), rank_new.end(), comp_vec);
mapRank2in.clear();
@@ -806,54 +806,54 @@ double BSLMM::ProposeGamma (const vector &rank_old,
}
bool comp_lr (pair a, pair b) {
- return (a.second > b.second);
+ return (a.second > b.second);
}
// If a_mode==13 then Uty==y.
-void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y) {
- clock_t time_start;
+ clock_t time_start;
class HYPBSLMM cHyp_old, cHyp_new;
-
+
gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
- gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
- gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
+ gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
+
+ gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
gsl_vector *alpha_new=gsl_vector_alloc (ni_test);
- gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
+ gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
gsl_vector *Utu=gsl_vector_alloc (ni_test);
gsl_vector *Utu_new=gsl_vector_alloc (ni_test);
gsl_vector *Utu_old=gsl_vector_alloc (ni_test);
-
+
gsl_vector *UtXb_new=gsl_vector_alloc (ni_test);
gsl_vector *UtXb_old=gsl_vector_alloc (ni_test);
-
+
gsl_vector *z_hat=gsl_vector_alloc (ni_test);
gsl_vector *z=gsl_vector_alloc (ni_test);
- gsl_vector *Utz=gsl_vector_alloc (ni_test);
+ gsl_vector *Utz=gsl_vector_alloc (ni_test);
+
+ gsl_vector_memcpy (Utz, Uty);
- gsl_vector_memcpy (Utz, Uty);
-
double logPost_new, logPost_old;
double logMHratio;
- double mean_z=0.0;
-
+ double mean_z=0.0;
+
gsl_matrix_set_zero (Result_gamma);
gsl_vector_set_zero (Utu);
gsl_vector_set_zero (alpha_prime);
if (a_mode==13) {
pheno_mean=0.0;
}
-
+
vector > beta_g;
for (size_t i=0; i rank_new, rank_old;
- vector beta_new, beta_old;
+ vector beta_new, beta_old;
vector > pos_loglr;
@@ -865,59 +865,59 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
for (size_t i=0; itm_hour%24*3600+
ptm->tm_min*60+ptm->tm_sec);
}
- gsl_r = gsl_rng_alloc(gslType);
+ gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
+
+ double *p_gamma = new double[ns_test];
CalcPgamma (p_gamma);
-
+
gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
+
// Initial parameters.
InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr);
-
+
cHyp_initial=cHyp_old;
-
+
if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
+ logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
cHyp_old);
beta_old.clear();
for (size_t i=0; isize; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
- }
+ }
gsl_matrix_free (UtXgamma);
gsl_vector_free (beta);
- }
-
+ }
+
// Calculate centered z_hat, and pve.
if (a_mode==13) {
time_start=clock();
@@ -929,28 +929,28 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
}
time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
}
-
+
// Start MCMC.
int accept;
size_t total_step=w_step+s_step;
size_t w=0, w_col, pos;
size_t repeat=0;
-
+
for (size_t t=0; tsize; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
@@ -980,7 +980,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
gsl_vector_free (beta);
}
}
-
+
// M-H steps.
for (size_t i=0; i0 ||
log(gsl_rng_uniform(gsl_r))size2,
UtX, bv, 0.0, beta);
@@ -1181,18 +1181,18 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
WriteParam (beta);
WriteBV(bv);
-
+
gsl_vector_free (H_eval);
gsl_vector_free (beta);
gsl_vector_free (bv);
-
+
return;
}
// Below fits MCMC for rho=1.
void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y,
const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
- time_t time_start=clock();
+ time_t time_start=clock();
gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1,
s_size);
gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
@@ -1271,7 +1271,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
for (size_t i=0; isize1, rank_add.size() );
gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2);
@@ -1302,7 +1302,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
// Get t(X_add)X_add and t(X_add)X_temp.
clock_t time_start=clock();
-
+
// Somehow the lapack_dgemm does not work here.
gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add,
0.0, XtX_aa);
@@ -1325,15 +1325,15 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
i_flag=0;
}
- gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
+ gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
if (i_flag==1) {
gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add);
gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
} else {
gsl_vector_const_view Xcopy_col=
- gsl_matrix_const_column(X_old, i_old);
+ gsl_matrix_const_column(X_old, i_old);
gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
- }
+ }
if (i_flag==1) {
d=gsl_vector_get (Xty_add, i_add);
@@ -1385,34 +1385,34 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
rank_union.clear();
mapRank2in_remove.clear();
mapRank2in_add.clear();
-
+
return;
}
-double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
+double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
double logpost=0.0;
-
+
// For quantitative traits, calculate pve and pge.
// Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
if (a_mode==11) {
cHyp.pve=0.0;
- cHyp.pge=1.0;
+ cHyp.pge=1.0;
}
// Calculate likelihood.
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
else {logpost-=0.5*yty;}
-
+
logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
-
+
return logpost;
}
double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
const gsl_vector *Xty, const double yty,
const size_t s_size, gsl_vector *Xb,
- gsl_vector *beta, class HYPBSLMM &cHyp) {
+ gsl_vector *beta, class HYPBSLMM &cHyp) {
double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
double logpost=0.0;
double d, P_yy=yty, logdet_O=0.0;
@@ -1423,10 +1423,10 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
gsl_vector_const_view Xty_sub=
gsl_vector_const_subvector (Xty, 0, s_size);
-
+
gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
- gsl_vector *beta_hat=gsl_vector_alloc (s_size);
+ gsl_vector *beta_hat=gsl_vector_alloc (s_size);
gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
@@ -1436,9 +1436,9 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
gsl_matrix_scale (Omega, sigma_a2);
gsl_matrix_set_identity (M_temp);
gsl_matrix_add (Omega, M_temp);
-
+
// Calculate beta_hat.
- logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
+ logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
gsl_vector_scale (beta_hat, sigma_a2);
gsl_blas_ddot (Xty_temp, beta_hat, &d);
@@ -1453,27 +1453,27 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
// Sample beta.
for (size_t i=0; i > beta_g;
for (size_t i=0; i rank_new, rank_old;
vector > pos_loglr;
-
+
time_start=clock();
MatrixCalcLmLR (X, z, pos_loglr);
time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
@@ -1570,44 +1570,44 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
for (size_t i=0; itm_hour%24*3600+
ptm->tm_min*60+ptm->tm_sec);
}
- gsl_r = gsl_rng_alloc(gslType);
+ gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
+
+ double *p_gamma = new double[ns_test];
CalcPgamma (p_gamma);
-
+
gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
+
// Initial parameters.
InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr);
-
+
cHyp_initial=cHyp_old;
- if (cHyp_old.n_gamma==0) {
+ if (cHyp_old.n_gamma==0) {
logPost_old=CalcPosterior (ztz, cHyp_old);
}
- else {
- SetXgamma (Xgamma_old, X, rank_old);
+ else {
+ SetXgamma (Xgamma_old, X, rank_old);
CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
rank_old.size(), Xb_old, beta_old,
cHyp_old);
- }
+ }
// Calculate centered z_hat, and pve.
if (a_mode==13) {
@@ -1618,28 +1618,28 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
}
}
-
+
// Start MCMC.
int accept;
size_t total_step=w_step+s_step;
size_t w=0, w_col, pos;
size_t repeat=0;
-
+
for (size_t t=0; t0 ||
log(gsl_rng_uniform(gsl_r)).
*/
-#ifndef __BSLMM_H__
+#ifndef __BSLMM_H__
#define __BSLMM_H__
#include
@@ -30,23 +30,23 @@ using namespace std;
class BSLMM {
-public:
+public:
// IO-related parameters.
- int a_mode;
+ int a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
// LMM-related parameters.
double l_min;
double l_max;
size_t n_region;
double pve_null;
double pheno_mean;
-
+
// BSLMM MCMC-related parameters
double h_min, h_max, h_scale; // Priors for h.
double rho_min, rho_max, rho_scale; // Priors for rho.
@@ -61,8 +61,8 @@ public:
size_t n_mh; // Number of MH steps per iter.
double geo_mean; // Mean of geometric dist.
long int randseed;
- double trace_G;
-
+ double trace_G;
+
HYPBSLMM cHyp_initial;
// Summary statistics.
@@ -74,32 +74,32 @@ public:
// Time spent on constructing the proposal distribution for
// gamma (i.e. lmm or lm analysis).
- double time_Proposal;
+ double time_Proposal;
// Indicator for individuals (phenotypes): 0 missing, 1
// available for analysis.
- vector indicator_idv;
+ vector indicator_idv;
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
- vector indicator_snp;
+ vector indicator_snp;
// Record SNP information.
- vector snpInfo;
-
+ vector snpInfo;
+
// Not included in PARAM.
- gsl_rng *gsl_r;
- gsl_ran_discrete_t *gsl_t;
- map mapRank2pos;
-
+ gsl_rng *gsl_r;
+ gsl_ran_discrete_t *gsl_t;
+ map mapRank2pos;
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
-
+
void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *eval,
const double lambda);
-
+
void MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y);
@@ -111,10 +111,10 @@ public:
void WriteParam (const gsl_vector *alpha);
void WriteResult (const int flag, const gsl_matrix *Result_hyp,
const gsl_matrix *Result_gamma, const size_t w_col);
-
+
// Subfunctions inside MCMC.
void CalcPgamma (double *p_gammar);
-
+
double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
const double sigma_a2);
void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index ebbff70..e1a53a6 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -92,13 +92,13 @@ void BSLMMDAP::CopyToParam (PARAM &cPar) {
// Read hyp file.
-void ReadFile_hyb (const string &file_hyp, vector &vec_sa2,
+void ReadFile_hyb (const string &file_hyp, vector &vec_sa2,
vector &vec_sb2, vector &vec_wab) {
vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
igzstream infile (file_hyp.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open hyp file: "< &vec_sa2,
}
// Read bf file.
-void ReadFile_bf (const string &file_bf, vector &vec_rs,
+void ReadFile_bf (const string &file_bf, vector &vec_rs,
vector > > &BF) {
BF.clear(); vec_rs.clear();
@@ -196,12 +196,12 @@ void ReadFile_bf (const string &file_bf, vector &vec_rs,
// Read category files.
// Read both continuous and discrete category file, record mapRS2catc.
-void ReadFile_cat (const string &file_cat, const vector &vec_rs,
+void ReadFile_cat (const string &file_cat, const vector &vec_rs,
gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
size_t &kc, size_t &kd) {
igzstream infile (file_cat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open category file: "< &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
+void BSLMMDAP::WriteResult (const vector &vec_rs,
+ const gsl_matrix *Hyper, const gsl_vector *pip,
const gsl_vector *coef) {
string file_gamma, file_hyp, file_coef;
file_gamma=path_out+"/"+file_out;
@@ -388,15 +388,15 @@ void BSLMMDAP::WriteResult (const vector &vec_rs,
outfile_coef.open (file_coef.c_str(), ofstream::out);
if (!outfile_gamma) {
- cout<<"error writing file: "< &vec_rs,
}
-double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
- const gsl_vector *K_eval,
+double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
+ const gsl_vector *K_eval,
const double sigma_b2, const double tau) {
gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
@@ -457,16 +457,16 @@ double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
return logm;
}
-double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty,
- const gsl_vector *K_eval,
- const double sigma_a2,
+double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
+ const gsl_vector *Uty,
+ const gsl_vector *K_eval,
+ const double sigma_a2,
const double sigma_b2, const double tau) {
clock_t time_start;
double logm=0.0;
double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
- gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
+ gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
UtXgamma->size2);
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
@@ -526,8 +526,8 @@ double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y) {
clock_t time_start;
@@ -601,9 +601,9 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
return;
}
-void single_ct_regression(const gsl_matrix_int *Xd,
+void single_ct_regression(const gsl_matrix_int *Xd,
const gsl_vector_int *dlevel,
- const gsl_vector *pip_vec,
+ const gsl_vector *pip_vec,
gsl_vector *coef, gsl_vector *prior_vec) {
map sum_pip;
@@ -635,13 +635,13 @@ void single_ct_regression(const gsl_matrix_int *Xd,
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector &vec_rs,
- const vector &vec_sa2,
- const vector &vec_sb2,
- const vector &wab,
- const vector > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
+void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
+ const vector &vec_rs,
+ const vector &vec_sa2,
+ const vector &vec_sb2,
+ const vector &wab,
+ const vector > > &BF,
+ gsl_matrix *Ac, gsl_matrix_int *Ad,
gsl_vector_int *dlevel) {
clock_t time_start;
diff --git a/src/bslmmdap.h b/src/bslmmdap.h
index 8445669..db5774b 100644
--- a/src/bslmmdap.h
+++ b/src/bslmmdap.h
@@ -78,35 +78,35 @@ public:
void CopyToParam (PARAM &cPar);
void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF);
- void WriteResult (const vector &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
+ void WriteResult (const vector &vec_rs,
+ const gsl_matrix *Hyper, const gsl_vector *pip,
const gsl_vector *coef);
- double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
+ double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
const double sigma_b2, const double tau);
- double CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const double sigma_a2, const double sigma_b2,
+ double CalcMarginal (const gsl_matrix *UtXgamma,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double sigma_a2, const double sigma_b2,
const double tau);
double CalcPrior (class HYPBSLMM &cHyp);
- void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+ void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y);
- void DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector &vec_rs,
- const vector &vec_sa2,
- const vector &vec_sb2,
- const vector &wab,
- const vector > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
+ void DAP_EstimateHyper (const size_t kc, const size_t kd,
+ const vector &vec_rs,
+ const vector &vec_sa2,
+ const vector &vec_sb2,
+ const vector &wab,
+ const vector > > &BF,
+ gsl_matrix *Ac, gsl_matrix_int *Ad,
gsl_vector_int *dlevel);
};
-void ReadFile_hyb (const string &file_hyp, vector &vec_sa2,
+void ReadFile_hyb (const string &file_hyp, vector &vec_sa2,
vector &vec_sb2, vector &vec_wab);
-void ReadFile_bf (const string &file_bf, vector &vec_rs,
+void ReadFile_bf (const string &file_bf, vector &vec_rs,
vector > > &BF);
-void ReadFile_cat (const string &file_cat, const vector &vec_rs,
+void ReadFile_cat (const string &file_cat, const vector &vec_rs,
gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
size_t &kc, size_t &kd);
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index 7ad250f..733dae1 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -28,9 +28,9 @@ using namespace std;
using namespace Eigen;
// On two different clusters, compare eigen vs lapack/gsl:
-//
+//
// dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
-// dgemv, 20x or 4x faster than gsl,
+// dgemv, 20x or 4x faster than gsl,
// eigen, 1x or 0.3x slower than lapack
// invert, 20x or 10x faster than lapack
//
diff --git a/src/eigenlib.h b/src/eigenlib.h
index 8cb8880..3659dc1 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -16,7 +16,7 @@
along with this program. If not, see .
*/
-#ifndef __EIGENLIB_H__
+#ifndef __EIGENLIB_H__
#define __EIGENLIB_H__
#include
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 11b33c1..fbed988 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -31,7 +31,7 @@
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
-#include "lapack.h"
+#include "lapack.h"
#include "io.h"
#include "gemma.h"
#include "vc.h"
@@ -211,7 +211,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<size1, cPar.n_cvt);
-
+
//set covariates matrix W and phenotype matrix Y
//an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
@@ -1545,7 +1545,7 @@ void GEMMA::BatchRun (PARAM &cPar) {
cVarcov.CopyToParam(cPar);
}
-
+
// LM.
if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
@@ -2581,7 +2581,7 @@ void GEMMA::BatchRun (PARAM &cPar) {
} else {
kc=0; kd=0;
}
-
+
cout<<"## number of blocks = "<( gptr());
+ return * reinterpret_cast( gptr());
}
int gzstreambuf::flush_buffer() {
diff --git a/src/gzstream.h b/src/gzstream.h
index 4e86ad9..241ff76 100644
--- a/src/gzstream.h
+++ b/src/gzstream.h
@@ -21,8 +21,8 @@
// Revision : $Revision: 1.5 $
// Revision_date : $Date: 2002/04/26 23:30:15 $
// Author(s) : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
// Standard C++ Library".
// ============================================================================
@@ -58,14 +58,14 @@ public:
setp( buffer, buffer + (bufferSize-1));
setg( buffer + 4, // beginning of putback area
buffer + 4, // read position
- buffer + 4); // end position
+ buffer + 4); // end position
// ASSERT: both input & output capabilities will not be used together
}
int is_open() { return opened; }
gzstreambuf* open( const char* name, int open_mode);
gzstreambuf* close();
~gzstreambuf() { close(); }
-
+
virtual int overflow( int c = EOF);
virtual int underflow();
virtual int sync();
@@ -85,15 +85,15 @@ public:
// ----------------------------------------------------------------------------
// User classes. Use igzstream and ogzstream analogously to ifstream and
-// ofstream respectively. They read and write files based on the gz*
+// ofstream respectively. They read and write files based on the gz*
// function interface of the zlib. Files are compatible with gzip compression.
// ----------------------------------------------------------------------------
class igzstream : public gzstreambase, public std::istream {
public:
- igzstream() : std::istream( &buf) {}
+ igzstream() : std::istream( &buf) {}
igzstream( const char* name, int open_mode = std::ios::in)
- : gzstreambase( name, open_mode), std::istream( &buf) {}
+ : gzstreambase( name, open_mode), std::istream( &buf) {}
gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
void open( const char* name, int open_mode = std::ios::in) {
gzstreambase::open( name, open_mode);
@@ -104,7 +104,7 @@ class ogzstream : public gzstreambase, public std::ostream {
public:
ogzstream() : std::ostream( &buf) {}
ogzstream( const char* name, int mode = std::ios::out)
- : gzstreambase( name, mode), std::ostream( &buf) {}
+ : gzstreambase( name, mode), std::ostream( &buf) {}
gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
void open( const char* name, int open_mode = std::ios::out) {
gzstreambase::open( name, open_mode);
diff --git a/src/lapack.h b/src/lapack.h
index 88fc509..5e1db35 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -16,7 +16,7 @@
along with this program. If not, see .
*/
-#ifndef __LAPACK_H__
+#ifndef __LAPACK_H__
#define __LAPACK_H__
#include
diff --git a/src/ldr.cpp b/src/ldr.cpp
index a1e5791..f0a1b37 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
@@ -24,7 +24,7 @@
#include
#include
#include
-#include
+#include
#include
#include
#include
@@ -51,7 +51,7 @@ using namespace Eigen;
void LDR::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
@@ -62,11 +62,11 @@ void LDR::CopyFromParam (PARAM &cPar) {
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -77,7 +77,7 @@ void LDR::CopyToParam (PARAM &cPar) {
//X is a p by n matrix.
void LDR::VB (const vector > &Xt,
const gsl_matrix *W_gsl, const gsl_vector *y_gsl) {
-
+
// Save gsl_vector and gsl_matrix into Eigen library formats.
MatrixXd W(W_gsl->size1, W_gsl->size2);
VectorXd y(y_gsl->size);
@@ -105,6 +105,6 @@ void LDR::VB (const vector > &Xt,
// Save results.
// TO DO.
-
+
return;
}
diff --git a/src/ldr.h b/src/ldr.h
index ceb08cf..ab55fe2 100644
--- a/src/ldr.h
+++ b/src/ldr.h
@@ -1,22 +1,22 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
-#ifndef __LDR_H__
+#ifndef __LDR_H__
#define __LDR_H__
#include
@@ -29,16 +29,16 @@ using namespace std;
class LDR {
-public:
+public:
// IO-related parameters.
- int a_mode;
+ int a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
// Summary statistics.
size_t ni_total, ns_total; // Total number of individuals & SNPs.
size_t ni_test, ns_test; // Number of individuals & SNPs used
@@ -52,16 +52,16 @@ public:
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
vector indicator_snp;
-
+
vector snpInfo; // Record SNP information.
-
+
// Not included in PARAM.
- gsl_rng *gsl_r;
-
+ gsl_rng *gsl_r;
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
-
+
void VB(const vector > &Xt,
const gsl_matrix *W_gsl, const gsl_vector *y_gsl);
};
diff --git a/src/lm.cpp b/src/lm.cpp
index d3ad7f3..94729db 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -412,13 +412,13 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
string chr;
std::cout << "Warning: WJA hard coded SNP missingness " <<
"threshold of 10%" << std::endl;
-
+
// Start reading genotypes and analyze.
for (size_t t=0; t(unzipped_data[i*3+1])/32768.0;
bgen_geno_prob_BB=
static_cast(unzipped_data[i*3+2])/32768.0;
-
+
// WJA
bgen_geno_prob_non_miss=
bgen_geno_prob_AA +
diff --git a/src/lm.h b/src/lm.h
index fac84e1..cf428f0 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -54,7 +54,7 @@ public:
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
- vector indicator_snp;
+ vector indicator_snp;
vector snpInfo; // Record SNP information.
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 73a9232..2b5ca84 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -55,7 +55,7 @@ void LMM::CopyFromParam (PARAM &cPar) {
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
-
+
// WJA added.
file_oxford=cPar.file_oxford;
@@ -228,12 +228,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
+
p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
gsl_matrix_set (Pab, p, index_ab, p_ab);
}
@@ -269,7 +269,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -277,7 +277,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode,
ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
+
p2_ab=ps2_ab+ps_aw*ps_bw*
ps2_ww/(ps_ww*ps_ww);
p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
@@ -318,7 +318,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -337,7 +337,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode,
p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*
ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/
(ps_ww*ps_ww);
-
+
gsl_matrix_set (PPPab, p, index_ab, p3_ab);
}
}
@@ -1479,7 +1479,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
break;
}
@@ -1487,7 +1487,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
ci_total++;
continue;
}
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {
gsl_vector_set(x, ci_test, 2);
@@ -1499,7 +1499,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
-
+
ci_total++;
ci_test++;
}
@@ -1678,7 +1678,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
ProgressBar ("Reading SNPs ", t, ns_total-1);
}
if (indicator_snp[t]==0) {continue;}
-
+
// Read SNP header.
id.clear();
rs.clear();
@@ -1752,14 +1752,14 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
gsl_vector_set_zero(x_miss);
for (size_t i=0; i(unzipped_data[i*3])/32768.0;
bgen_geno_prob_AB=
static_cast(unzipped_data[i*3+1])/32768.0;
bgen_geno_prob_BB=
static_cast(unzipped_data[i*3+2])/32768.0;
-
+
// WJA.
bgen_geno_prob_non_miss = bgen_geno_prob_AA +
bgen_geno_prob_AB+bgen_geno_prob_BB;
@@ -1768,13 +1768,13 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
n_miss++;
}
else {
-
+
bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
+
geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
+
gsl_vector_set(x, c_phen, geno);
gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
@@ -1962,7 +1962,7 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms,
}
}
else {
-
+
// If derivates change signs.
int status;
int iter=0, max_iter=100;
@@ -2010,11 +2010,11 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms,
status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1);
}
while (status==GSL_CONTINUE && itersize2+1);
-
+
// Start reading genotypes and analyze.
for (size_t t=0; t indicator_idv;
+ vector indicator_idv;
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
diff --git a/src/logistic.h b/src/logistic.h
index 7f9e133..b61ab14 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -1,75 +1,75 @@
-#ifndef LOGISTIC_H_
-#define LOGISTIC_H_
-
-// Mixed interface.
-void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with num. categories.
- gsl_matrix *Xc, // Continuous covariates matrix
- // Nobs x Kc
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_matrix *Xc, // Continuous covariates
- // matrix Nobs x Kc
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Reg. L1 0.0 if not used.
- double lambdaL2); // Reg. L2 0.0 if not used.
-
-double fLogit_mixed(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Categorical-only interface.
-void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K .
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used
- double lambdaL2); // Regularization L2, 0 if not used
-
-double fLogit_cat(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Continuous-only interface.
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1) + Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *yhat);// Vector of prob. predicted
- // by the logistic.
-
-int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used.
- double lambdaL2); // Regularization L2, 0 if not used.
-
-double fLogit_cont(gsl_vector *beta,
- gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-#endif
+#ifndef LOGISTIC_H_
+#define LOGISTIC_H_
+
+// Mixed interface.
+void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with num. categories.
+ gsl_matrix *Xc, // Continuous covariates matrix
+ // Nobs x Kc
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
+
+int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_matrix *Xc, // Continuous covariates
+ // matrix Nobs x Kc
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Reg. L1 0.0 if not used.
+ double lambdaL2); // Reg. L2 0.0 if not used.
+
+double fLogit_mixed(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+// Categorical-only interface.
+void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
+
+int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K .
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used
+ double lambdaL2); // Regularization L2, 0 if not used
+
+double fLogit_cat(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+// Continuous-only interface.
+void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1 + Sum_k(C_k-1) + Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *yhat);// Vector of prob. predicted
+ // by the logistic.
+
+int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used.
+ double lambdaL2); // Regularization L2, 0 if not used.
+
+double fLogit_cont(gsl_vector *beta,
+ gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+#endif
diff --git a/src/main.cpp b/src/main.cpp
index b7ac884..c7f0573 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -25,12 +25,12 @@
using namespace std;
-int main(int argc, char * argv[]) {
- GEMMA cGemma;
+int main(int argc, char * argv[]) {
+ GEMMA cGemma;
PARAM cPar;
if (argc <= 1) {
- cGemma.PrintHeader();
+ cGemma.PrintHeader();
return EXIT_SUCCESS;
}
if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') {
@@ -46,27 +46,27 @@ int main(int argc, char * argv[]) {
if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') {
cGemma.PrintLicense();
return EXIT_SUCCESS;
- }
-
- cGemma.Assign(argc, argv, cPar);
+ }
+
+ cGemma.Assign(argc, argv, cPar);
ifstream check_dir((cPar.path_out).c_str());
if (!check_dir) {
mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH);
- }
-
+ }
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());}
-
+
cPar.CheckParam();
-
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
cGemma.BatchRun(cPar);
-
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
cGemma.WriteLog(argc, argv, cPar);
-
+
return EXIT_SUCCESS; }
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index c09b587..709bdde 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -375,7 +375,7 @@ double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
het_probs[i] /= sum;
double p_hwe = 0.0;
-
+
// p-value calculation for p_hwe.
for (i = 0; i <= rare_copies; i++)
{
diff --git a/src/param.cpp b/src/param.cpp
index a56f5eb..413d517 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -66,7 +66,7 @@ time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
// Read files: obtain ns_total, ng_total, ns_test, ni_test.
void PARAM::ReadFiles (void) {
string file_str;
-
+
// Read cat file.
if (!file_mcat.empty()) {
if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
@@ -216,7 +216,7 @@ void PARAM::ReadFiles (void) {
// If both fam file and pheno files are used, use
// phenotypes inside the pheno file.
if (!file_pheno.empty()) {
-
+
// Phenotype file before genotype file.
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
p_column)==false) {error=true;}
@@ -247,7 +247,7 @@ void PARAM::ReadFiles (void) {
// Read genotype and phenotype file for BIMBAM format.
if (!file_geno.empty()) {
-
+
// Annotation file before genotype file.
if (!file_anno.empty() ) {
if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
@@ -297,11 +297,11 @@ void PARAM::ReadFiles (void) {
if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
if (t==0) {
-
+
// If both fam file and pheno files are used, use
// phenotypes inside the pheno file.
if (!file_pheno.empty()) {
-
+
// Phenotype file before genotype file.
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
p_column)==false) {
@@ -347,7 +347,7 @@ void PARAM::ReadFiles (void) {
// Read genotype and phenotype file for multiple BIMBAM files.
if (!file_mgeno.empty()) {
-
+
// Annotation file before genotype file.
if (!file_anno.empty() ) {
if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
@@ -788,7 +788,7 @@ void PARAM::CheckParam (void) {
if (!file_bfile.empty()) {flag++;}
if (!file_geno.empty()) {flag++;}
if (!file_gene.empty()) {flag++;}
-
+
// WJA added.
if (!file_oxford.empty()) {flag++;}
@@ -982,7 +982,7 @@ void PARAM::CheckData (void) {
// WJA NOTE: I added this condition so that covariates can be added
// through sample, probably not exactly what is wanted.
- if(file_oxford.empty())
+ if(file_oxford.empty())
{
if ((file_cvt).empty() || (indicator_cvt).size()==0) {
n_cvt=1;
@@ -1017,7 +1017,7 @@ void PARAM::CheckData (void) {
"the number of individuals. "<.
*/
@@ -24,7 +24,7 @@
#include
#include
#include
-#include
+#include
#include
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -43,36 +43,36 @@ using namespace std;
void PRDT::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
- indicator_pheno=cPar.indicator_pheno;
+
+ indicator_pheno=cPar.indicator_pheno;
indicator_cvt=cPar.indicator_cvt;
indicator_idv=cPar.indicator_idv;
-
+
snpInfo=cPar.snpInfo;
mapRS2est=cPar.mapRS2est;
-
+
time_eigen=0;
-
+
n_ph=cPar.n_ph;
np_obs=cPar.np_obs;
np_miss=cPar.np_miss;
ns_total=cPar.ns_total;
- ns_test=0;
-
+ ns_test=0;
+
return;
}
void PRDT::CopyToParam (PARAM &cPar) {
cPar.ns_test=ns_test;
cPar.time_eigen=time_eigen;
-
+
return;
-}
+}
void PRDT::WriteFiles (gsl_vector *y_prdt) {
string file_str;
@@ -80,13 +80,13 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) {
file_str+=".";
file_str+="prdt";
file_str+=".txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
cout<<"error writing file: "<size, ni_total=G->size1;
-
+
gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
- gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
+ gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
gsl_vector *eval=gsl_vector_alloc (ni_test);
gsl_vector *Utu=gsl_vector_alloc (ni_test);
gsl_vector *w=gsl_vector_alloc (ni_total);
gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-
+
//center matrix G based on indicator_idv
for (size_t i=0; isize; i++) {
d=gsl_vector_get(eval, i);
@@ -189,7 +189,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
}
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-
+
// Free matrices.
gsl_matrix_free(Goo);
gsl_matrix_free(Gfo);
@@ -199,7 +199,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
gsl_vector_free(w);
gsl_permutation_free(pmt);
- return;
+ return;
}
void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
@@ -208,17 +208,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
cout<<"error reading genotype file:"<size);
gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-
+
ns_test=0;
// Start reading genotypes and analyze.
@@ -227,24 +227,24 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
if (t%d_pace==0 || t==(ns_total-1)) {
ProgressBar ("Reading SNPs ", t, ns_total-1);
}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
+ ch_ptr=strtok (NULL, " , \t");
+
if (mapRS2est.count(rs)==0) {
continue;
} else {
effect_size=mapRS2est[rs];
}
-
+
x_mean=0.0;
c_phen=0;
n_miss=0;
x_train_mean=0;
n_train_nomiss=0;
-
+
gsl_vector_set_zero(x_miss);
for (size_t i=0; isize-n_miss);
x_train_mean/=(double)(n_train_nomiss);
-
-
+
+
for (size_t i=0; isize; ++i) {
geno=gsl_vector_get(x, i);
if (gsl_vector_get (x_miss, i)==0) {
@@ -291,17 +291,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
gsl_vector_scale (x, effect_size);
gsl_vector_add (y_prdt, x);
-
+
ns_test++;
- }
+ }
cout< b;
+ bitset<8> b;
string rs;
-
+
size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
double geno, x_mean, x_train_mean, effect_size;
-
+
gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-
+
// Calculate n_bit and c, the number of bit for each SNP.
if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
else {n_bit=indicator_idv.size()/4+1; }
-
+
// Print the first 3 magic numbers.
for (size_t i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
- }
-
+ }
+
ns_test=0;
-
+
for (vector::size_type t=0; tsize==n_miss) {
cout << "snp " << rs << " has missing genotype for all " <<
"individuals and will be ignored."<size-n_miss);
x_train_mean/=(double)(n_train_nomiss);
-
+
for (size_t i=0; isize; ++i) {
geno=gsl_vector_get(x, i);
if (geno==-9) {
@@ -425,47 +425,47 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
gsl_vector_set(x, i, geno-x_train_mean);
}
}
-
+
gsl_vector_scale (x, effect_size);
gsl_vector_add (y_prdt, x);
-
+
ns_test++;
- }
+ }
cout<::size_type i1=0; i1::size_type j1=0; j1::size_type i2=0;
i2::size_type j2=0;
j2::size_type i=0;
i::size_type j=0; j::size_type i=0;
i::size_type j=0; j.
*/
-#ifndef __PRDT_H__
+#ifndef __PRDT_H__
#define __PRDT_H__
#include
@@ -29,30 +29,30 @@
using namespace std;
class PRDT {
-
+
public:
// IO-related parameters.
size_t a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
vector > indicator_pheno;
vector indicator_cvt;
vector indicator_idv;
vector snpInfo;
map mapRS2est;
-
+
size_t n_ph;
size_t np_obs, np_miss;
size_t ns_total;
size_t ns_test;
-
+
double time_eigen;
-
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
diff --git a/src/varcov.cpp b/src/varcov.cpp
index 48a4fc5..46b5bf8 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -28,7 +28,7 @@
#include
#include
#include
-#include
+#include
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -47,22 +47,22 @@ using namespace std;
void VARCOV::CopyFromParam (PARAM &cPar) {
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
+
time_opt=0.0;
window_cm=cPar.window_cm;
window_bp=cPar.window_bp;
window_ns=cPar.window_ns;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -82,7 +82,7 @@ void VARCOV::WriteCov (const int flag, const vector &snpInfo_sub,
if (flag==0) {
outfile.open (file_cov.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing file: "< &snpInfo_sub,
}
}
}
-
+
outfile< &snpInfo_sort) {
size_t t2=0, n_nb=0;
for (size_t t=0; t > &X_mat, vector &cov_vec) {
}
return;
-}
+}
// Read the genotype file again, calculate r2 between pairs of SNPs
// within a window, output the file every 10K SNPs the output results
@@ -229,7 +229,7 @@ void VARCOV::AnalyzeBimbam () {
for (size_t i=0; isize; j++) {
x_vec[j]=gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
- }
-
+ }
+
n_nb=snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
@@ -301,8 +301,8 @@ void VARCOV::AnalyzeBimbam () {
gsl_vector_free(geno);
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
@@ -314,7 +314,7 @@ void VARCOV::AnalyzePlink () {
// Calculate the number of right-hand-side neighbours for each SNP.
vector snpInfo_sub;
CalcNB(snpInfo);
-
+
size_t ni_test=0;
for (size_t i=0; isize; j++) {
x_vec[j]=gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
- }
-
+ }
+
n_nb=snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
@@ -392,7 +392,7 @@ void VARCOV::AnalyzePlink () {
gsl_vector_free(geno);
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
diff --git a/src/varcov.h b/src/varcov.h
index 3b45123..4a1eb3a 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -16,7 +16,7 @@
along with this program. If not, see .
*/
-#ifndef __VARCOV_H__
+#ifndef __VARCOV_H__
#define __VARCOV_H__
#include "gsl/gsl_vector.h"
@@ -47,7 +47,7 @@ public:
double window_cm;
size_t window_bp;
size_t window_ns;
-
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
diff --git a/src/vc.cpp b/src/vc.cpp
index 9fe1894..e8ccece 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -175,7 +175,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1);
double sigma2;
-
+
// Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
gsl_matrix_set_zero (p->P);
for (size_t i=0; iP, K_temp);
}
-
+
// Calculate H^{-1}.
eigenlib_invert(p->P);
@@ -211,7 +211,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
// Calculate Py, KPy, PKPy.
gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
-
+
double d;
for (size_t i=0; iKPy_mat, i);
@@ -221,7 +221,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
gsl_vector_memcpy (&KPy.vector, p->Py);
} else {
gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
-
+
// Seems to be important to use gsl dgemv here instead of
// eigenlib_dgemv; otherwise.
gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0,
@@ -643,7 +643,7 @@ void ReadFile_cor (const string &file_cor, const set &setSnps,
}
while (!safeGetline(infile, line).eof()) {
-
+
//do not read cor values this time; upto col_n-1.
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -942,7 +942,7 @@ void ReadFile_cor (const string &file_cor, const vector &vec_rs,
ReadHeader_vc (line, header);
while (!safeGetline(infile, line).eof()) {
-
+
// Do not read cor values this time; upto col_n-1.
d_pos1=0; d_cm1=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -1115,7 +1115,7 @@ void ReadFile_cor (const string &file_cor, const vector &vec_rs,
mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor;
}
cout<1e-3 && it<100) {
-
+
// Update tau_inv.
gsl_blas_ddot (q_vec, pve, &d);
if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
@@ -2143,7 +2143,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace,
gsl_vector_mul(wz, w);
int n_bit;
-
+
// Calculate n_bit and c, the number of bit for each snp.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -2170,7 +2170,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==ni_total) {
break;
}
@@ -2393,7 +2393,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace,
if (indicator_snp[t]==0) {continue;}
// n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
+ infile.seekg(t*n_bit+3);
// Read genotypes.
geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
@@ -2402,7 +2402,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==ni_total) {
break;
}
--
cgit v1.2.3