aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Carbonetto2017-07-07 11:20:56 -0500
committerGitHub2017-07-07 11:20:56 -0500
commit86e96ede4ff0955bb2d03ac6c1bd7562a3984955 (patch)
tree33120540091e7d16b58f389a13949df397535912
parentb3747413e6c5c8cd447e979157880676da66a342 (diff)
parentb9758364059d52e153a9f1b4fcae3bc3f3e68422 (diff)
downloadpangemma-86e96ede4ff0955bb2d03ac6c1bd7562a3984955.tar.gz
Merge pull request #51 from genenetwork/spacing
Spacing fixes.
-rw-r--r--Makefile.linux10
-rw-r--r--Makefile.macosx8
-rw-r--r--README.md6
-rw-r--r--doc/manual.tex30
-rw-r--r--src/bslmm.cpp780
-rw-r--r--src/bslmm.h48
-rw-r--r--src/bslmmdap.cpp60
-rw-r--r--src/bslmmdap.h36
-rw-r--r--src/eigenlib.cpp4
-rw-r--r--src/eigenlib.h2
-rw-r--r--src/gemma.cpp46
-rw-r--r--src/gemma.h8
-rw-r--r--src/gzstream.cpp6
-rw-r--r--src/gzstream.h16
-rw-r--r--src/lapack.h2
-rw-r--r--src/ldr.cpp18
-rw-r--r--src/ldr.h26
-rw-r--r--src/lm.cpp6
-rw-r--r--src/lm.h2
-rw-r--r--src/lmm.cpp50
-rw-r--r--src/lmm.h4
-rw-r--r--src/logistic.h150
-rw-r--r--src/main.cpp30
-rw-r--r--src/mathfunc.cpp2
-rw-r--r--src/param.cpp24
-rw-r--r--src/param.h6
-rw-r--r--src/prdt.cpp188
-rw-r--r--src/prdt.h14
-rw-r--r--src/varcov.cpp56
-rw-r--r--src/varcov.h4
-rw-r--r--src/vc.cpp26
31 files changed, 834 insertions, 834 deletions
diff --git a/Makefile.linux b/Makefile.linux
index 0b2c0b9..b51bbfe 100644
--- a/Makefile.linux
+++ b/Makefile.linux
@@ -14,8 +14,8 @@ SYS = LNX
# Disable WITH_LAPACK option can slow computation speed significantly and is not recommended
# Disable WITH_ARPACK option only disable -apprx option in the software
WITH_LAPACK = 1
-FORCE_32BIT =
-FORCE_DYNAMIC =
+FORCE_32BIT =
+FORCE_DYNAMIC =
DIST_NAME = gemma-0.96alpha
# --------------------------------------------------------------------
@@ -45,7 +45,7 @@ OUTPUT = $(BIN_DIR)/gemma
SOURCES = $(SRC_DIR)/main.cpp
-HDR =
+HDR =
# Detailed libary paths, D for dynamic and S for static
LIBS_LNX_D_LAPACK = -llapack
@@ -54,7 +54,7 @@ LIBS_LNX_S_LAPACK = /software/atlas-3.10.3-el7-x86_64/lib/liblapack.a \
/software/atlas-3.10.3-el7-x86_64/lib/libcblas.a \
/software/atlas-3.10.3-el7-x86_64/lib/libf77blas.a \
/software/atlas-3.10.3-el7-x86_64/lib/libatlas.a -lgfortran \
- -Wl,--allow-multiple-definition
+ -Wl,--allow-multiple-definition
SOURCES += $(SRC_DIR)/param.cpp $(SRC_DIR)/gemma.cpp $(SRC_DIR)/io.cpp $(SRC_DIR)/lm.cpp $(SRC_DIR)/lmm.cpp $(SRC_DIR)/vc.cpp $(SRC_DIR)/mvlmm.cpp $(SRC_DIR)/bslmm.cpp $(SRC_DIR)/prdt.cpp $(SRC_DIR)/mathfunc.cpp $(SRC_DIR)/gzstream.cpp $(SRC_DIR)/eigenlib.cpp $(SRC_DIR)/ldr.cpp $(SRC_DIR)/bslmmdap.cpp $(SRC_DIR)/logistic.cpp $(SRC_DIR)/varcov.cpp
HDR += $(SRC_DIR)/param.h $(SRC_DIR)/gemma.h $(SRC_DIR)/io.h $(SRC_DIR)/lm.h $(SRC_DIR)/lmm.h $(SRC_DIR)/vc.h $(SRC_DIR)/mvlmm.h $(SRC_DIR)/bslmm.h $(SRC_DIR)/prdt.h $(SRC_DIR)/mathfunc.h $(SRC_DIR)/gzstream.h $(SRC_DIR)/eigenlib.h
@@ -91,7 +91,7 @@ $(OUTPUT): $(OBJS)
$(OBJS) : $(HDR)
-.cpp.o:
+.cpp.o:
$(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o
.SUFFIXES : .cpp .c .o $(SUFFIXES)
diff --git a/Makefile.macosx b/Makefile.macosx
index 992e442..00467d6 100644
--- a/Makefile.macosx
+++ b/Makefile.macosx
@@ -14,7 +14,7 @@ SYS = MAC
# Disable WITH_LAPACK option can slow computation speed significantly and is not recommended
# Disable WITH_ARPACK option only disable -apprx option in the software
WITH_LAPACK = 1
-FORCE_32BIT =
+FORCE_32BIT =
FORCE_DYNAMIC = 1
DIST_NAME = gemma-0.96
@@ -37,13 +37,13 @@ OUTPUT = $(BIN_DIR)/gemma
SOURCES = $(SRC_DIR)/main.cpp
-HDR =
+HDR =
# Detailed libary paths, D for dynamic and S for static
LIBS_LNX_D_LAPACK = -llapack
LIBS_MAC_D_LAPACK = -framework Accelerate
-LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
+LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
# Options
@@ -82,7 +82,7 @@ $(OUTPUT): $(OBJS)
$(OBJS) : $(HDR)
-.cpp.o:
+.cpp.o:
$(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o
.SUFFIXES : .cpp .c .o $(SUFFIXES)
diff --git a/README.md b/README.md
index bb6cf1e..a539cc0 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ MQS algorithm to estimate variance components.
If you use GEMMA for published work, please cite our paper:
+ Xiang Zhou and Matthew Stephens (2012). [Genome-wide efficient
-mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310)
+mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310)
*Nature Genetics* **44**, 821–824.
If you use the multivariate linear mixed model (mvLMM) in your
@@ -77,7 +77,7 @@ If you use the Bayesian sparse linear mixed model (BSLMM), please cite:
+ Xiang Zhou, Peter Carbonetto and Matthew Stephens (2013). [Polygenic
modeling with bayesian sparse linear mixed
models.](http://doi.org/10.1371/journal.pgen.1003264) *PLoS Genetics*
-**9**, e1003264.
+**9**, e1003264.
And if you use of the variance component estimation using summary
statistics, please cite:
@@ -118,7 +118,7 @@ This is the current structure of the GEMMA source repository:
subfolders; see Wilson et al "Good Enough Practices" paper for example
of this.*
-## Setup
+## Setup
There are two ways to install GEMMA:
diff --git a/doc/manual.tex b/doc/manual.tex
index d47aa22..c0119dc 100644
--- a/doc/manual.tex
+++ b/doc/manual.tex
@@ -100,11 +100,11 @@ available open-source numerical libraries.
Stephens (2014). Efficient multivariate linear mixed model
algorithms for genome-wide association studies. Nature Methods. 11:
407-409.
-
+
\item Bayesian sparse linear mixed models \\ Xiang Zhou, Peter
Carbonetto and Matthew Stephens (2013). Polygenic modeling with
Bayesian sparse linear mixed models. PLoS Genetics. 9(2): e1003264.
-
+
\item Variance component estimation with individual-level or summary
data\\ Xiang Zhou (2016). A unified framework for variance component
estimation with summary statistics in genome-wide association
@@ -212,10 +212,10 @@ effects terms ($\mathbf X\boldsymbol\beta$). These two parameters are
defined as follows:
\begin{align*}
\mbox{PVE}(\bbeta, \bu, \tau) &
- :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\
+ :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\
\mbox{PGE}(\bbeta, \bu)
& :=\frac{\mbox{V}(\bX\bbeta)}{\mbox{V}(\bX\bbeta+\bu)}, \\
-\intertext{where}
+\intertext{where}
\mbox{V}({\bf x}) &:=\frac{1}{n}\sum_{i=1}^n (x_i-\overline {x})^2.
\end{align*}
@@ -429,7 +429,7 @@ BIMBAM mean genotype file from IMPUTE genotype files
(\url{http://www.stats.ox.ac.uk/~marchini/software/gwas/file_format.html})\cite{Howie:2009}:
%
\begin{verbatim}
-cat [impute filename] | awk -v s=[number of samples/individuals]
+cat [impute filename] | awk -v s=[number of samples/individuals]
'{ printf $2 "," $4 "," $5; for(i=1; i<=s; i++) printf "," $(i*3+3)*2+$(i*3+4); printf "\n" }'
> [bimbam filename]
\end{verbatim}
@@ -815,7 +815,7 @@ chr rs ps n_mis n_obs allele1 allele0 af beta se
\end{verbatim}
%
-The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test.
+The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test.
\subsection{Estimate Relatedness Matrix from Genotypes}
@@ -1009,7 +1009,7 @@ ped format or the BIMBAM format are:
\begin{verbatim}
./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] [num3] -o [prefix]
-./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num]
+./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num]
-n [num1] [num2] [num3] -o [prefix]
\end{verbatim}
@@ -1041,7 +1041,7 @@ missing, one can impute these missing values before association tests:
\begin{verbatim}
./gemma -bfile [prefix] -k [filename] -predict -n [num1] [num2] [num3] -o [prefix]
-./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict
+./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict
-n [num1] [num2] [num3] -o [prefix]
\end{verbatim}
@@ -1154,10 +1154,10 @@ below:
%
\begin{verbatim}
h pve rho pge pi n_gamma
-4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25
-5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
-5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
-6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25
+4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25
+5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
+5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26
+6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25
5.479237e-01 6.228036e-01 3.231856e-01 4.326231e-01 2.164183e-03 27
\end{verbatim}
%
@@ -1196,9 +1196,9 @@ The basic usages for association analysis with either the PLINK binary
ped format or the BIMBAM format are:
\begin{verbatim}
-./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename]
+./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename]
-predict [num] -o [prefix]
-./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename]
+./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename]
-k [filename] -predict [num] -o [prefix]
\end{verbatim}
@@ -1544,7 +1544,7 @@ source code for detailed examples.
\item \textcolor{red}{-ci [num]} \quad specify fitting algorithm to compute the standard errors. (default 1; valid value 1-2; 1: MQS-HEW; 2: MQS-LDW.)
\end{itemize}
%
-
+
\clearpage
\bibliographystyle{plain}
diff --git a/src/bslmm.cpp b/src/bslmm.cpp
index 563b743..d579802 100644
--- a/src/bslmm.cpp
+++ b/src/bslmm.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -24,7 +24,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <ctime>
#include <cstring>
#include <algorithm>
@@ -50,32 +50,32 @@ using namespace std;
void BSLMM::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
- l_min=cPar.h_min;
- l_max=cPar.h_max;
- n_region=cPar.n_region;
+
+ l_min=cPar.h_min;
+ l_max=cPar.h_max;
+ n_region=cPar.n_region;
pve_null=cPar.pve_null;
pheno_mean=cPar.pheno_mean;
-
+
time_UtZ=0.0;
time_Omega=0.0;
n_accept=0;
-
- h_min=cPar.h_min;
- h_max=cPar.h_max;
+
+ h_min=cPar.h_min;
+ h_max=cPar.h_max;
h_scale=cPar.h_scale;
- rho_min=cPar.rho_min;
- rho_max=cPar.rho_max;
+ rho_min=cPar.rho_min;
+ rho_max=cPar.rho_max;
rho_scale=cPar.rho_scale;
- logp_min=cPar.logp_min;
- logp_max=cPar.logp_max;
+ logp_min=cPar.logp_min;
+ logp_max=cPar.logp_max;
logp_scale=cPar.logp_scale;
-
+
s_min=cPar.s_min;
s_max=cPar.s_max;
w_step=cPar.w_step;
@@ -86,17 +86,17 @@ void BSLMM::CopyFromParam (PARAM &cPar) {
geo_mean=cPar.geo_mean;
randseed=cPar.randseed;
trace_G=cPar.trace_G;
-
+
ni_total=cPar.ni_total;
ns_total=cPar.ns_total;
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -108,7 +108,7 @@ void BSLMM::CopyToParam (PARAM &cPar) {
cPar.n_accept=n_accept;
cPar.pheno_mean=pheno_mean;
cPar.randseed=randseed;
-
+
return;
}
@@ -119,28 +119,28 @@ void BSLMM::WriteBV (const gsl_vector *bv) {
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
+ cout<<"error writing file: "<<file_str.c_str()<<endl;
return;
}
-
+
size_t t=0;
for (size_t i=0; i<ni_total; ++i) {
if (indicator_idv[i]==0) {
outfile<<"NA"<<endl;
- }
+ }
else {
outfile<<scientific<<setprecision(6)<<
gsl_vector_get(bv, t)<<endl;
t++;
}
- }
-
- outfile.clear();
- outfile.close();
+ }
+
+ outfile.clear();
+ outfile.close();
return;
}
-void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
+void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
const gsl_vector *alpha, const size_t w) {
string file_str;
file_str=path_out+"/"+file_out;
@@ -148,20 +148,20 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
+ cout<<"error writing file: "<<file_str.c_str()<<endl;
return;}
-
+
outfile<<"chr"<<"\t"<<"rs"<<"\t"
<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
<<"beta"<<"\t"<<"gamma"<<endl;
-
+
size_t t=0;
for (size_t i=0; i<ns_total; ++i) {
- if (indicator_snp[i]==0) {continue;}
-
+ if (indicator_snp[i]==0) {continue;}
+
outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"
<<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
-
+
outfile<<scientific<<setprecision(6)<<
gsl_vector_get(alpha, t)<<"\t";
if (beta_g[t].second!=0) {
@@ -172,10 +172,10 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g,
outfile<<0.0<<"\t"<<0.0<<endl;
}
t++;
- }
-
- outfile.clear();
- outfile.close();
+ }
+
+ outfile.clear();
+ outfile.close();
return;
}
@@ -186,17 +186,17 @@ void BSLMM::WriteParam (const gsl_vector *alpha) {
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
- cout<<"error writing file: "<<file_str.c_str()<<endl;
+ cout<<"error writing file: "<<file_str.c_str()<<endl;
return;
}
-
+
outfile<<"chr"<<"\t"<<"rs"<<"\t"
<<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t"
<<"beta"<<"\t"<<"gamma"<<endl;
-
+
size_t t=0;
for (size_t i=0; i<ns_total; ++i) {
- if (indicator_snp[i]==0) {continue;}
+ if (indicator_snp[i]==0) {continue;}
outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<<
snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t";
@@ -204,14 +204,14 @@ void BSLMM::WriteParam (const gsl_vector *alpha) {
gsl_vector_get(alpha, t)<<"\t";
outfile<<0.0<<"\t"<<0.0<<endl;
t++;
- }
-
- outfile.clear();
- outfile.close();
+ }
+
+ outfile.clear();
+ outfile.close();
return;
}
-void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
+void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
const gsl_matrix *Result_gamma, const size_t w_col) {
string file_gamma, file_hyp;
file_gamma=path_out+"/"+file_out;
@@ -220,21 +220,21 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
file_hyp+=".hyp.txt";
ofstream outfile_gamma, outfile_hyp;
-
+
if (flag==0) {
outfile_gamma.open (file_gamma.c_str(), ofstream::out);
outfile_hyp.open (file_hyp.c_str(), ofstream::out);
if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
+ cout<<"error writing file: "<<file_gamma<<endl;
return;
}
if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
+ cout<<"error writing file: "<<file_hyp<<endl;
return;
}
-
+
outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl;
-
+
for (size_t i=0; i<s_max; ++i) {
outfile_gamma<<"s"<<i<<"\t";
}
@@ -244,18 +244,18 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
outfile_gamma.open (file_gamma.c_str(), ofstream::app);
outfile_hyp.open (file_hyp.c_str(), ofstream::app);
if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
+ cout<<"error writing file: "<<file_gamma<<endl;
return;
}
if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
+ cout<<"error writing file: "<<file_hyp<<endl;
return;
}
-
+
size_t w;
if (w_col==0) {w=w_pace;}
else {w=w_col;}
-
+
for (size_t i=0; i<w; ++i) {
outfile_hyp<<scientific;
for (size_t j=0; j<4; ++j) {
@@ -267,7 +267,7 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t";
outfile_hyp<<endl;
}
-
+
for (size_t i=0; i<w; ++i) {
for (size_t j=0; j<s_max; ++j) {
outfile_gamma<<
@@ -275,13 +275,13 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp,
}
outfile_gamma<<endl;
}
-
+
}
-
+
outfile_hyp.close();
outfile_hyp.clear();
outfile_gamma.close();
- outfile_gamma.clear();
+ outfile_gamma.clear();
return;
}
@@ -300,7 +300,7 @@ void BSLMM::CalcPgamma (double *p_gamma) {
return;
}
-void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
+void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
vector<size_t> &rank) {
size_t pos;
for (size_t i=0; i<rank.size(); ++i) {
@@ -309,32 +309,32 @@ void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X,
gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos);
gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector);
}
-
+
return;
}
-double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
+double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
const double sigma_a2) {
- double pve, var_y;
-
+ double pve, var_y;
+
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2);
gsl_matrix_set_identity (Omega);
- gsl_matrix_scale (Omega, 1.0/sigma_a2);
+ gsl_matrix_scale (Omega, 1.0/sigma_a2);
lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma,
1.0, Omega);
gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
CholeskySolve(Omega, Xty, OiXty);
-
+
gsl_blas_ddot (Xty, OiXty, &pve);
gsl_blas_ddot (Uty, Uty, &var_y);
-
+
pve/=var_y;
-
+
gsl_matrix_free (Omega);
gsl_vector_free (Xty);
gsl_vector_free (OiXty);
@@ -342,28 +342,28 @@ double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
return pve;
}
-void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
- vector<size_t> &rank, class HYPBSLMM &cHyp,
+void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
+ vector<size_t> &rank, class HYPBSLMM &cHyp,
vector<pair<size_t, double> > &pos_loglr) {
double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1);
-
+
cHyp.n_gamma=0;
for (size_t i=0; i<pos_loglr.size(); ++i) {
if (2.0*pos_loglr[i].second>q_genome) {cHyp.n_gamma++;}
}
if (cHyp.n_gamma<10) {cHyp.n_gamma=10;}
-
+
if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;}
- if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;}
-
+ if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;}
+
rank.clear();
for (size_t i=0; i<cHyp.n_gamma; ++i) {
rank.push_back(i);
}
-
+
cHyp.logp=log((double)cHyp.n_gamma/(double)ns_test);
- cHyp.h=pve_null;
-
+ cHyp.h=pve_null;
+
if (cHyp.logp==0) {cHyp.logp=-0.000001;}
if (cHyp.h==0) {cHyp.h=0.1;}
@@ -376,114 +376,114 @@ void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
} else {
sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
}
- if (sigma_a2==0) {sigma_a2=0.025;}
+ if (sigma_a2==0) {sigma_a2=0.025;}
cHyp.rho=CalcPveLM (UtXgamma, Uty, sigma_a2)/cHyp.h;
gsl_matrix_free (UtXgamma);
-
+
if (cHyp.rho>1.0) {cHyp.rho=1.0;}
-
+
if (cHyp.h<h_min) {cHyp.h=h_min;}
if (cHyp.h>h_max) {cHyp.h=h_max;}
if (cHyp.rho<rho_min) {cHyp.rho=rho_min;}
if (cHyp.rho>rho_max) {cHyp.rho=rho_max;}
if (cHyp.logp<logp_min) {cHyp.logp=logp_min;}
if (cHyp.logp>logp_max) {cHyp.logp=logp_max;}
-
+
cout<<"initial value of h = "<<cHyp.h<<endl;
cout<<"initial value of rho = "<<cHyp.rho<<endl;
cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl;
cout<<"initial value of |gamma| = "<<cHyp.n_gamma<<endl;
-
+
return;
}
-double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *Utu, gsl_vector *alpha_prime,
+double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval,
+ gsl_vector *Utu, gsl_vector *alpha_prime,
class HYPBSLMM &cHyp) {
double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
- gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size);
+
+ gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size);
gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
-
+
double logpost=0.0;
double d, ds, uy, Hi_yy=0, logdet_H=0.0;
for (size_t i=0; i<ni_test; ++i) {
d=gsl_vector_get (K_eval, i)*sigma_b2;
ds=d/(d+1.0);
- d=1.0/(d+1.0);
+ d=1.0/(d+1.0);
gsl_vector_set (weight_Hi, i, d);
-
+
logdet_H-=log(d);
uy=gsl_vector_get (Uty, i);
Hi_yy+=d*uy*uy;
-
- gsl_vector_set (Utu_rand, i,
+
+ gsl_vector_set (Utu_rand, i,
gsl_ran_gaussian(gsl_r, 1)*sqrt(ds));
}
-
+
// Sample tau.
double tau=1.0;
if (a_mode==11) {
- tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy);
+ tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy);
}
-
+
// Sample alpha.
gsl_vector_memcpy (alpha_prime, Uty);
gsl_vector_mul (alpha_prime, weight_Hi);
gsl_vector_scale (alpha_prime, sigma_b2);
-
+
// Sample u.
gsl_vector_memcpy (Utu, alpha_prime);
gsl_vector_mul (Utu, K_eval);
if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
- gsl_vector_add (Utu, Utu_rand);
-
+ gsl_vector_add (Utu, Utu_rand);
+
// For quantitative traits, calculate pve and ppe.
if (a_mode==11) {
gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=d/(double)ni_test;
+ cHyp.pve=d/(double)ni_test;
cHyp.pve/=cHyp.pve+1.0/tau;
- cHyp.pge=0.0;
+ cHyp.pge=0.0;
}
// Calculate likelihood.
logpost=-0.5*logdet_H;
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);}
else {logpost-=0.5*Hi_yy;}
-
+
logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
-
+
gsl_vector_free (Utu_rand);
gsl_vector_free (weight_Hi);
-
+
return logpost;
}
-double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- gsl_vector *UtXb, gsl_vector *Utu,
- gsl_vector *alpha_prime, gsl_vector *beta,
+double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ gsl_vector *UtXb, gsl_vector *Utu,
+ gsl_vector *alpha_prime, gsl_vector *beta,
class HYPBSLMM &cHyp) {
- clock_t time_start;
-
+ clock_t time_start;
+
double sigma_a2=cHyp.h*cHyp.rho/
(trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h));
-
+
double logpost=0.0;
double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
-
- gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
- UtXgamma->size2);
+
+ gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
+ UtXgamma->size2);
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2);
- gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
+ gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1);
gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1);
-
+
gsl_matrix_memcpy (UtXgamma_eval, UtXgamma);
-
+
logdet_H=0.0; P_yy=0.0;
for (size_t i=0; i<ni_test; ++i) {
gsl_vector_view UtXgamma_row=
@@ -492,139 +492,139 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma,
ds=d/(d+1.0);
d=1.0/(d+1.0);
gsl_vector_set (weight_Hi, i, d);
-
+
logdet_H-=log(d);
uy=gsl_vector_get (Uty, i);
P_yy+=d*uy*uy;
gsl_vector_scale (&UtXgamma_row.vector, d);
-
+
gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds));
}
-
+
// Calculate Omega.
gsl_matrix_set_identity (Omega);
-
+
time_start=clock();
lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval,
UtXgamma, 1.0, Omega);
time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
-
+
+
// Calculate beta_hat.
gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
logdet_O=CholeskySolve(Omega, XtHiy, beta_hat);
-
+
gsl_vector_scale (beta_hat, sigma_a2);
gsl_blas_ddot (XtHiy, beta_hat, &d);
P_yy-=d;
-
+
// Sample tau.
double tau=1.0;
if (a_mode==11) {
- tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy);
+ tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy);
}
// Sample beta.
for (size_t i=0; i<beta->size; i++)
{
- d=gsl_ran_gaussian(gsl_r, 1);
- gsl_vector_set(beta, i, d);
+ d=gsl_ran_gaussian(gsl_r, 1);
+ gsl_vector_set(beta, i, d);
}
- gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
-
+ gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
+
// This computes inv(L^T(Omega)) %*% beta.
gsl_vector_scale(beta, sqrt(sigma_a2/tau));
- gsl_vector_add(beta, beta_hat);
+ gsl_vector_add(beta, beta_hat);
gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
-
+
// Sample alpha.
gsl_vector_memcpy (alpha_prime, Uty);
gsl_vector_sub (alpha_prime, UtXb);
gsl_vector_mul (alpha_prime, weight_Hi);
gsl_vector_scale (alpha_prime, sigma_b2);
-
+
// Sample u.
gsl_vector_memcpy (Utu, alpha_prime);
gsl_vector_mul (Utu, K_eval);
-
+
if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));}
- gsl_vector_add (Utu, Utu_rand);
-
+ gsl_vector_add (Utu, Utu_rand);
+
// For quantitative traits, calculate pve and pge.
if (a_mode==11) {
gsl_blas_ddot (UtXb, UtXb, &d);
cHyp.pge=d/(double)ni_test;
-
+
gsl_blas_ddot (Utu, Utu, &d);
cHyp.pve=cHyp.pge+d/(double)ni_test;
-
+
if (cHyp.pve==0) {cHyp.pge=0.0;}
else {cHyp.pge/=cHyp.pve;}
- cHyp.pve/=cHyp.pve+1.0/tau;
- }
+ cHyp.pve/=cHyp.pve+1.0/tau;
+ }
gsl_matrix_free (UtXgamma_eval);
gsl_matrix_free (Omega);
gsl_vector_free (XtHiy);
gsl_vector_free (beta_hat);
- gsl_vector_free (Utu_rand);
+ gsl_vector_free (Utu_rand);
gsl_vector_free (weight_Hi);
-
+
logpost=-0.5*logdet_H-0.5*logdet_O;
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
else {logpost-=0.5*P_yy;}
logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp));
-
+
return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
+void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu,
gsl_vector *z_hat, class HYPBSLMM &cHyp) {
double d;
-
+
gsl_blas_ddot (Utu, Utu, &d);
- cHyp.pve=d/(double)ni_test;
-
+ cHyp.pve=d/(double)ni_test;
+
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
-
+
cHyp.pve/=cHyp.pve+1.0;
- cHyp.pge=0.0;
-
+ cHyp.pge=0.0;
+
return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
- const gsl_vector *Utu, gsl_vector *z_hat,
+void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb,
+ const gsl_vector *Utu, gsl_vector *z_hat,
class HYPBSLMM &cHyp) {
double d;
gsl_vector *UtXbU=gsl_vector_alloc (Utu->size);
-
+
gsl_blas_ddot (UtXb, UtXb, &d);
cHyp.pge=d/(double)ni_test;
-
+
gsl_blas_ddot (Utu, Utu, &d);
cHyp.pve=cHyp.pge+d/(double)ni_test;
-
+
gsl_vector_memcpy (UtXbU, Utu);
gsl_vector_add (UtXbU, UtXb);
- gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
-
+ gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
+
if (cHyp.pve==0) {cHyp.pge=0.0;}
else {cHyp.pge/=cHyp.pve;}
-
+
cHyp.pve/=cHyp.pve+1.0;
-
+
gsl_vector_free(UtXbU);
return;
}
-void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
- gsl_vector *z) {
+void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
+ gsl_vector *z) {
double d1, d2, z_rand=0.0;
for (size_t i=0; i<z->size; ++i) {
d1=gsl_vector_get (y, i);
@@ -634,7 +634,7 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
if (d1<=0.0) {
// Control, right truncated.
- do {
+ do {
z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand>0.0);
}
@@ -643,25 +643,25 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat,
z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand<0.0);
}
-
+
gsl_vector_set (z, i, z_rand);
}
return;
}
-double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
+double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
-
+
double h=cHyp_old.h, rho=cHyp_old.rho;
-
+
double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale;
-
+
for (size_t i=0; i<repeat; ++i) {
h=h+(gsl_rng_uniform(gsl_r)-0.5)*d_h;
if (h<h_min) {h=2*h_min-h;}
if (h>h_max) {h=2*h_max-h;}
-
+
rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho;
if (rho<rho_min) {rho=2*rho_min-rho;}
if (rho>rho_max) {rho=2*rho_max-rho;}
@@ -671,13 +671,13 @@ double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old,
return 0.0;
}
-double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
+double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp;
double log_ratio=0.0;
-
+
double d_logp=min(0.1, (logp_max-logp_min)*logp_scale);
-
+
for (size_t i=0; i<repeat; ++i) {
logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp;
if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;}
@@ -686,29 +686,29 @@ double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old,
logp_old=logp_new;
}
cHyp_new.logp=logp_new;
-
+
return log_ratio;
}
bool comp_vec (size_t a, size_t b) {
- return (a < b);
+ return (a < b);
}
-double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
- vector<size_t> &rank_new,
- const double *p_gamma,
- const class HYPBSLMM &cHyp_old,
- class HYPBSLMM &cHyp_new,
+double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
+ vector<size_t> &rank_new,
+ const double *p_gamma,
+ const class HYPBSLMM &cHyp_old,
+ class HYPBSLMM &cHyp_new,
const size_t &repeat) {
map<size_t, int> mapRank2in;
size_t r;
double unif, logp=0.0;
int flag_gamma;
size_t r_add, r_remove, col_id;
-
+
rank_new.clear();
if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<<endl;}
-
+
if (cHyp_old.n_gamma!=0) {
for (size_t i=0; i<rank_old.size(); ++i) {
r=rank_old[i];
@@ -716,29 +716,29 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
mapRank2in[r]=1;
}
}
- cHyp_new.n_gamma=cHyp_old.n_gamma;
-
+ cHyp_new.n_gamma=cHyp_old.n_gamma;
+
for (size_t i=0; i<repeat; ++i) {
- unif=gsl_rng_uniform(gsl_r);
-
+ unif=gsl_rng_uniform(gsl_r);
+
if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;}
- else if (unif>=0.40 && unif < 0.80 &&
+ else if (unif>=0.40 && unif < 0.80 &&
cHyp_new.n_gamma>s_min) {
flag_gamma=2;
}
- else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
+ else if (unif>=0.80 && cHyp_new.n_gamma>0 &&
cHyp_new.n_gamma<ns_test) {
flag_gamma=3;
}
else {flag_gamma=4;}
-
+
if(flag_gamma==1) {
// Add a SNP.
do {
r_add=gsl_ran_discrete (gsl_r, gsl_t);
- } while (mapRank2in.count(r_add)!=0);
-
+ } while (mapRank2in.count(r_add)!=0);
+
double prob_total=1.0;
for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
r=rank_new[i];
@@ -756,14 +756,14 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
// Delete a SNP.
col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
r_remove=rank_new[col_id];
-
+
double prob_total=1.0;
for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
r=rank_new[i];
prob_total-=p_gamma[r];
}
prob_total+=p_gamma[r_remove];
-
+
mapRank2in.erase(r_remove);
rank_new.erase(rank_new.begin()+col_id);
logp+=log(p_gamma[r_remove]/prob_total)+
@@ -779,18 +779,18 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
// Be careful with the proposal.
do {
r_add=gsl_ran_discrete (gsl_r, gsl_t);
- } while (mapRank2in.count(r_add)!=0);
-
+ } while (mapRank2in.count(r_add)!=0);
+
double prob_total=1.0;
for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
r=rank_new[i];
prob_total-=p_gamma[r];
}
-
+
logp+=log(p_gamma[r_remove]/
(prob_total+p_gamma[r_remove]-p_gamma[r_add]));
logp-=log(p_gamma[r_add]/prob_total);
-
+
mapRank2in.erase(r_remove);
mapRank2in[r_add]=1;
rank_new.erase(rank_new.begin()+col_id);
@@ -798,7 +798,7 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
}
else {logp+=0;} // Do not change.
}
-
+
stable_sort (rank_new.begin(), rank_new.end(), comp_vec);
mapRank2in.clear();
@@ -806,54 +806,54 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old,
}
bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) {
- return (a.second > b.second);
+ return (a.second > b.second);
}
// If a_mode==13 then Uty==y.
-void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y) {
- clock_t time_start;
+ clock_t time_start;
class HYPBSLMM cHyp_old, cHyp_new;
-
+
gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
- gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
- gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
+ gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
+
+ gsl_vector *alpha_prime=gsl_vector_alloc (ni_test);
gsl_vector *alpha_new=gsl_vector_alloc (ni_test);
- gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
+ gsl_vector *alpha_old=gsl_vector_alloc (ni_test);
gsl_vector *Utu=gsl_vector_alloc (ni_test);
gsl_vector *Utu_new=gsl_vector_alloc (ni_test);
gsl_vector *Utu_old=gsl_vector_alloc (ni_test);
-
+
gsl_vector *UtXb_new=gsl_vector_alloc (ni_test);
gsl_vector *UtXb_old=gsl_vector_alloc (ni_test);
-
+
gsl_vector *z_hat=gsl_vector_alloc (ni_test);
gsl_vector *z=gsl_vector_alloc (ni_test);
- gsl_vector *Utz=gsl_vector_alloc (ni_test);
+ gsl_vector *Utz=gsl_vector_alloc (ni_test);
+
+ gsl_vector_memcpy (Utz, Uty);
- gsl_vector_memcpy (Utz, Uty);
-
double logPost_new, logPost_old;
double logMHratio;
- double mean_z=0.0;
-
+ double mean_z=0.0;
+
gsl_matrix_set_zero (Result_gamma);
gsl_vector_set_zero (Utu);
gsl_vector_set_zero (alpha_prime);
if (a_mode==13) {
pheno_mean=0.0;
}
-
+
vector<pair<double, double> > beta_g;
for (size_t i=0; i<ns_test; i++) {
beta_g.push_back(make_pair(0.0, 0.0));
}
-
+
vector<size_t> rank_new, rank_old;
- vector<double> beta_new, beta_old;
+ vector<double> beta_new, beta_old;
vector<pair<size_t, double> > pos_loglr;
@@ -865,59 +865,59 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
for (size_t i=0; i<ns_test; ++i) {
mapRank2pos[i]=pos_loglr[i].first;
}
-
+
// Calculate proposal distribution for gamma (unnormalized),
// and set up gsl_r and gsl_t.
gsl_rng_env_setup();
const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
+ gslType = gsl_rng_default;
if (randseed<0)
{
time_t rawtime;
time (&rawtime);
tm * ptm = gmtime (&rawtime);
-
+
randseed = (unsigned) (ptm->tm_hour%24*3600+
ptm->tm_min*60+ptm->tm_sec);
}
- gsl_r = gsl_rng_alloc(gslType);
+ gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
+
+ double *p_gamma = new double[ns_test];
CalcPgamma (p_gamma);
-
+
gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
+
// Initial parameters.
InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr);
-
+
cHyp_initial=cHyp_old;
-
+
if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
- logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
+ logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old,
cHyp_old);
beta_old.clear();
for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
beta_old.push_back(0);
- }
+ }
}
else {
- gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test,
+ gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test,
cHyp_old.n_gamma);
gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma);
- SetXgamma (UtXgamma, UtX, rank_old);
- logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old,
+ SetXgamma (UtXgamma, UtX, rank_old);
+ logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old,
Utu_old, alpha_old, beta, cHyp_old);
-
+
beta_old.clear();
for (size_t i=0; i<beta->size; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
- }
+ }
gsl_matrix_free (UtXgamma);
gsl_vector_free (beta);
- }
-
+ }
+
// Calculate centered z_hat, and pve.
if (a_mode==13) {
time_start=clock();
@@ -929,28 +929,28 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
}
time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
}
-
+
// Start MCMC.
int accept;
size_t total_step=w_step+s_step;
size_t w=0, w_col, pos;
size_t repeat=0;
-
+
for (size_t t=0; t<total_step; ++t) {
if (t%d_pace==0 || t==total_step-1) {
ProgressBar ("Running MCMC ", t, total_step-1,
(double)n_accept/(double)(t*n_mh+1));
}
- if (a_mode==13) {
- SampleZ (y, z_hat, z);
- mean_z=CenterVector (z);
-
+ if (a_mode==13) {
+ SampleZ (y, z_hat, z);
+ mean_z=CenterVector (z);
+
time_start=clock();
gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz);
time_UtZ+=(clock()-time_start)/
(double(CLOCKS_PER_SEC)*60.0);
-
+
// First proposal.
if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) {
logPost_old=
@@ -959,7 +959,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
beta_old.clear();
for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
beta_old.push_back(0);
- }
+ }
}
else {
gsl_matrix *UtXgamma=
@@ -971,7 +971,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
CalcPosterior(UtXgamma, Utz, K_eval,
UtXb_old, Utu_old, alpha_old,
beta, cHyp_old);
-
+
beta_old.clear();
for (size_t i=0; i<beta->size; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
@@ -980,7 +980,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
gsl_vector_free (beta);
}
}
-
+
// M-H steps.
for (size_t i=0; i<n_mh; ++i) {
if (gsl_rng_uniform(gsl_r)<0.33) {
@@ -989,20 +989,20 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
else {
repeat=1;
}
-
+
logMHratio=0.0;
logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat);
logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma,
cHyp_old, cHyp_new, repeat);
logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
+
if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) {
logPost_new=CalcPosterior(Utz, K_eval, Utu_new,
alpha_new, cHyp_new);
beta_new.clear();
for (size_t i=0; i<cHyp_new.n_gamma; ++i) {
beta_new.push_back(0);
- }
+ }
}
else {
gsl_matrix *UtXgamma=
@@ -1020,17 +1020,17 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
}
gsl_matrix_free (UtXgamma);
gsl_vector_free (beta);
- }
-
- logMHratio+=logPost_new-logPost_old;
-
+ }
+
+ logMHratio+=logPost_new-logPost_old;
+
if (logMHratio>0 ||
log(gsl_rng_uniform(gsl_r))<logMHratio) {
accept=1; n_accept++;
}
else {accept=0;}
- if (accept==1) {
+ if (accept==1) {
logPost_old=logPost_new;
rank_old.clear(); beta_old.clear();
if (rank_new.size()!=0) {
@@ -1045,8 +1045,8 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
gsl_vector_memcpy (Utu_old, Utu_new);
}
else {cHyp_new=cHyp_old;}
- }
-
+ }
+
// Calculate z_hat, and pve.
if (a_mode==13) {
time_start=clock();
@@ -1057,21 +1057,21 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
CalcCC_PVEnZ (U, UtXb_old, Utu_old,
z_hat, cHyp_old);
}
-
+
// Sample mu and update z_hat.
gsl_vector_sub (z, z_hat);
mean_z+=CenterVector(z);
mean_z+=
- gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test));
+ gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test));
gsl_vector_add_constant (z_hat, mean_z);
-
+
time_UtZ+=(clock()-time_start)/
(double(CLOCKS_PER_SEC)*60.0);
}
-
+
// Save data.
if (t<w_step) {continue;}
- else {
+ else {
if (t%r_pace==0) {
w_col=w%w_pace;
if (w_col==0) {
@@ -1086,76 +1086,76 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
gsl_matrix_set_zero (Result_gamma);
}
}
-
+
gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h);
gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve);
gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho);
gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge);
gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp);
gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma);
-
+
for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
pos=mapRank2pos[rank_old[i]]+1;
gsl_matrix_set(Result_gamma,w_col,i,
pos);
-
+
beta_g[pos-1].first+=beta_old[i];
- beta_g[pos-1].second+=1.0;
+ beta_g[pos-1].second+=1.0;
}
-
+
gsl_vector_add (alpha_prime, alpha_old);
gsl_vector_add (Utu, Utu_old);
-
+
if (a_mode==13) {
pheno_mean+=mean_z;
}
-
+
w++;
-
+
}
-
+
}
}
cout<<endl;
-
+
w_col=w%w_pace;
- WriteResult (1, Result_hyp, Result_gamma, w_col);
-
+ WriteResult (1, Result_hyp, Result_gamma, w_col);
+
gsl_matrix_free(Result_hyp);
- gsl_matrix_free(Result_gamma);
-
+ gsl_matrix_free(Result_gamma);
+
gsl_vector_free(z_hat);
gsl_vector_free(z);
- gsl_vector_free(Utz);
- gsl_vector_free(UtXb_new);
+ gsl_vector_free(Utz);
+ gsl_vector_free(UtXb_new);
gsl_vector_free(UtXb_old);
- gsl_vector_free(alpha_new);
+ gsl_vector_free(alpha_new);
gsl_vector_free(alpha_old);
- gsl_vector_free(Utu_new);
- gsl_vector_free(Utu_old);
-
- gsl_vector_scale (alpha_prime, 1.0/(double)w);
- gsl_vector_scale (Utu, 1.0/(double)w);
+ gsl_vector_free(Utu_new);
+ gsl_vector_free(Utu_old);
+
+ gsl_vector_scale (alpha_prime, 1.0/(double)w);
+ gsl_vector_scale (Utu, 1.0/(double)w);
if (a_mode==13) {
pheno_mean/=(double)w;
}
-
+
gsl_vector *alpha=gsl_vector_alloc (ns_test);
gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX,
- alpha_prime, 0.0, alpha);
+ alpha_prime, 0.0, alpha);
WriteParam (beta_g, alpha, w);
gsl_vector_free(alpha);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
- WriteBV(alpha_prime);
-
- gsl_vector_free(alpha_prime);
- gsl_vector_free(Utu);
-
+ WriteBV(alpha_prime);
+
+ gsl_vector_free(alpha_prime);
+ gsl_vector_free(Utu);
+
delete [] p_gamma;
beta_g.clear();
-
+
return;
}
@@ -1169,9 +1169,9 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
gsl_vector_memcpy (H_eval, eval);
gsl_vector_scale (H_eval, lambda);
gsl_vector_add_constant (H_eval, 1.0);
-
+
gsl_vector_memcpy (bv, Uty);
- gsl_vector_div (bv, H_eval);
+ gsl_vector_div (bv, H_eval);
gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2,
UtX, bv, 0.0, beta);
@@ -1181,18 +1181,18 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
WriteParam (beta);
WriteBV(bv);
-
+
gsl_vector_free (H_eval);
gsl_vector_free (beta);
gsl_vector_free (bv);
-
+
return;
}
// Below fits MCMC for rho=1.
void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y,
const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
- time_t time_start=clock();
+ time_t time_start=clock();
gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1,
s_size);
gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
@@ -1271,7 +1271,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
for (size_t i=0; i<rank_union.size(); i++) {
if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;}
- gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
+ gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old);
gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
@@ -1290,7 +1290,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
j_old++; j_new++;
}
i_old++; i_new++;
- }
+ }
} else {
gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() );
gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2);
@@ -1302,7 +1302,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
// Get t(X_add)X_add and t(X_add)X_temp.
clock_t time_start=clock();
-
+
// Somehow the lapack_dgemm does not work here.
gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add,
0.0, XtX_aa);
@@ -1325,15 +1325,15 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
i_flag=0;
}
- gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
+ gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new);
if (i_flag==1) {
gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add);
gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
} else {
gsl_vector_const_view Xcopy_col=
- gsl_matrix_const_column(X_old, i_old);
+ gsl_matrix_const_column(X_old, i_old);
gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector);
- }
+ }
if (i_flag==1) {
d=gsl_vector_get (Xty_add, i_add);
@@ -1385,34 +1385,34 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old,
rank_union.clear();
mapRank2in_remove.clear();
mapRank2in_add.clear();
-
+
return;
}
-double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
+double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) {
double logpost=0.0;
-
+
// For quantitative traits, calculate pve and pge.
// Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
if (a_mode==11) {
cHyp.pve=0.0;
- cHyp.pge=1.0;
+ cHyp.pge=1.0;
}
// Calculate likelihood.
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);}
else {logpost-=0.5*yty;}
-
+
logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+
((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp));
-
+
return logpost;
}
double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
const gsl_vector *Xty, const double yty,
const size_t s_size, gsl_vector *Xb,
- gsl_vector *beta, class HYPBSLMM &cHyp) {
+ gsl_vector *beta, class HYPBSLMM &cHyp) {
double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test);
double logpost=0.0;
double d, P_yy=yty, logdet_O=0.0;
@@ -1423,10 +1423,10 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size);
gsl_vector_const_view Xty_sub=
gsl_vector_const_subvector (Xty, 0, s_size);
-
+
gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size);
gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size);
- gsl_vector *beta_hat=gsl_vector_alloc (s_size);
+ gsl_vector *beta_hat=gsl_vector_alloc (s_size);
gsl_vector *Xty_temp=gsl_vector_alloc (s_size);
gsl_vector_memcpy (Xty_temp, &Xty_sub.vector);
@@ -1436,9 +1436,9 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
gsl_matrix_scale (Omega, sigma_a2);
gsl_matrix_set_identity (M_temp);
gsl_matrix_add (Omega, M_temp);
-
+
// Calculate beta_hat.
- logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
+ logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat);
gsl_vector_scale (beta_hat, sigma_a2);
gsl_blas_ddot (Xty_temp, beta_hat, &d);
@@ -1453,27 +1453,27 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
// Sample beta.
for (size_t i=0; i<s_size; i++)
{
- d=gsl_ran_gaussian(gsl_r, 1);
- gsl_vector_set(beta, i, d);
+ d=gsl_ran_gaussian(gsl_r, 1);
+ gsl_vector_set(beta, i, d);
}
gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size);
gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
- &beta_sub.vector);
-
+ &beta_sub.vector);
+
// This computes inv(L^T(Omega)) %*% beta.
gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau));
- gsl_vector_add(&beta_sub.vector, beta_hat);
+ gsl_vector_add(&beta_sub.vector, beta_hat);
gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix,
- &beta_sub.vector, 0.0, Xb);
-
+ &beta_sub.vector, 0.0, Xb);
+
// For quantitative traits, calculate pve and pge.
if (a_mode==11) {
gsl_blas_ddot (Xb, Xb, &d);
cHyp.pve=d/(double)ni_test;
cHyp.pve/=cHyp.pve+1.0/tau;
- cHyp.pge=1.0;
- }
-
+ cHyp.pge=1.0;
+ }
+
logpost=-0.5*logdet_O;
if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);}
else {logpost-=0.5*P_yy;}
@@ -1490,11 +1490,11 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX,
}
// Calculate pve and pge, and calculate z_hat for case-control data.
-void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp)
+void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp)
{
gsl_vector_set_zero(z_hat);
cHyp.pve=0.0;
- cHyp.pge=1.0;
+ cHyp.pge=1.0;
return;
}
@@ -1502,12 +1502,12 @@ void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp)
void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
class HYPBSLMM &cHyp) {
double d;
-
+
gsl_blas_ddot (Xb, Xb, &d);
cHyp.pve=d/(double)ni_test;
cHyp.pve/=cHyp.pve+1.0;
cHyp.pge=1.0;
-
+
gsl_vector_memcpy (z_hat, Xb);
return;
@@ -1515,16 +1515,16 @@ void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat,
// If a_mode==13, then run probit model.
void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
- clock_t time_start;
+ clock_t time_start;
double time_set=0, time_post=0;
class HYPBSLMM cHyp_old, cHyp_new;
-
+
gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6);
- gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
-
+ gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max);
+
gsl_vector *Xb_new=gsl_vector_alloc (ni_test);
- gsl_vector *Xb_old=gsl_vector_alloc (ni_test);
+ gsl_vector *Xb_old=gsl_vector_alloc (ni_test);
gsl_vector *z_hat=gsl_vector_alloc (ni_test);
gsl_vector *z=gsl_vector_alloc (ni_test);
@@ -1540,28 +1540,28 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
double ztz=0.0;
gsl_vector_memcpy (z, y);
-
+
// For quantitative traits, y is centered already in
// gemma.cpp, but just in case.
- double mean_z=CenterVector (z);
+ double mean_z=CenterVector (z);
gsl_blas_ddot(z, z, &ztz);
double logPost_new, logPost_old;
double logMHratio;
-
+
gsl_matrix_set_zero (Result_gamma);
if (a_mode==13) {
pheno_mean=0.0;
}
-
+
vector<pair<double, double> > beta_g;
for (size_t i=0; i<ns_test; i++) {
beta_g.push_back(make_pair(0.0, 0.0));
}
-
+
vector<size_t> rank_new, rank_old;
vector<pair<size_t, double> > pos_loglr;
-
+
time_start=clock();
MatrixCalcLmLR (X, z, pos_loglr);
time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
@@ -1570,44 +1570,44 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
for (size_t i=0; i<ns_test; ++i) {
mapRank2pos[i]=pos_loglr[i].first;
}
-
+
// Calculate proposal distribution for gamma (unnormalized),
// and set up gsl_r and gsl_t.
- gsl_rng_env_setup();
+ gsl_rng_env_setup();
const gsl_rng_type * gslType;
- gslType = gsl_rng_default;
+ gslType = gsl_rng_default;
if (randseed<0)
{
time_t rawtime;
time (&rawtime);
tm * ptm = gmtime (&rawtime);
-
+
randseed = (unsigned) (ptm->tm_hour%24*3600+
ptm->tm_min*60+ptm->tm_sec);
}
- gsl_r = gsl_rng_alloc(gslType);
+ gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
-
- double *p_gamma = new double[ns_test];
+
+ double *p_gamma = new double[ns_test];
CalcPgamma (p_gamma);
-
+
gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma);
-
+
// Initial parameters.
InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr);
-
+
cHyp_initial=cHyp_old;
- if (cHyp_old.n_gamma==0) {
+ if (cHyp_old.n_gamma==0) {
logPost_old=CalcPosterior (ztz, cHyp_old);
}
- else {
- SetXgamma (Xgamma_old, X, rank_old);
+ else {
+ SetXgamma (Xgamma_old, X, rank_old);
CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
rank_old.size(), Xb_old, beta_old,
cHyp_old);
- }
+ }
// Calculate centered z_hat, and pve.
if (a_mode==13) {
@@ -1618,28 +1618,28 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
}
}
-
+
// Start MCMC.
int accept;
size_t total_step=w_step+s_step;
size_t w=0, w_col, pos;
size_t repeat=0;
-
+
for (size_t t=0; t<total_step; ++t) {
if (t%d_pace==0 || t==total_step-1) {
ProgressBar ("Running MCMC ", t, total_step-1,
(double)n_accept/(double)(t*n_mh+1));
}
- if (a_mode==13) {
- SampleZ (y, z_hat, z);
+ if (a_mode==13) {
+ SampleZ (y, z_hat, z);
mean_z=CenterVector (z);
gsl_blas_ddot(z,z,&ztz);
-
+
// First proposal.
- if (cHyp_old.n_gamma==0) {
+ if (cHyp_old.n_gamma==0) {
logPost_old=CalcPosterior (ztz, cHyp_old);
- } else {
+ } else {
gsl_matrix_view Xold_sub=
gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test,
rank_old.size());
@@ -1651,7 +1651,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz,
rank_old.size(), Xb_old, beta_old,
cHyp_old);
- }
+ }
}
// M-H steps.
@@ -1663,23 +1663,23 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
logMHratio=0.0;
logMHratio+=
- ProposeHnRho(cHyp_old, cHyp_new, repeat);
+ ProposeHnRho(cHyp_old, cHyp_new, repeat);
logMHratio+=
ProposeGamma (rank_old, rank_new, p_gamma,
- cHyp_old, cHyp_new, repeat);
+ cHyp_old, cHyp_new, repeat);
logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat);
-
+
if (cHyp_new.n_gamma==0) {
logPost_new=CalcPosterior (ztz, cHyp_new);
} else {
-
+
// This makes sure that rank_old.size() ==
// rank_remove.size() does not happen.
if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) {
time_start=clock();
- SetXgamma (Xgamma_new, X, rank_new);
+ SetXgamma (Xgamma_new, X, rank_new);
CalcXtX (Xgamma_new, z, rank_new.size(),
- XtX_new, Xtz_new);
+ XtX_new, Xtz_new);
time_set+=(clock()-time_start)/
(double(CLOCKS_PER_SEC)*60.0);
} else {
@@ -1697,17 +1697,17 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
cHyp_new);
time_post+=(clock()-time_start)/
(double(CLOCKS_PER_SEC)*60.0);
- }
- logMHratio+=logPost_new-logPost_old;
-
+ }
+ logMHratio+=logPost_new-logPost_old;
+
if (logMHratio>0 ||
log(gsl_rng_uniform(gsl_r))<logMHratio) {
accept=1;
n_accept++;
}
else {accept=0;}
-
- if (accept==1) {
+
+ if (accept==1) {
logPost_old=logPost_new;
cHyp_old=cHyp_new;
gsl_vector_memcpy (Xb_old, Xb_new);
@@ -1719,7 +1719,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
++i) {
rank_old.push_back(rank_new[i]);
}
-
+
gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
gsl_matrix_view XtXold_sub=gsl_matrix_submatrix(XtX_old, 0, 0, rank_new.size(), rank_new.size());
gsl_vector_view Xtzold_sub=gsl_vector_subvector(Xtz_old, 0, rank_new.size());
@@ -1742,8 +1742,8 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
} else {
cHyp_new=cHyp_old;
}
-
- }
+
+ }
// Calculate z_hat, and pve.
if (a_mode==13) {
@@ -1753,19 +1753,19 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
else {
CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old);
}
-
+
// Sample mu and update z_hat.
gsl_vector_sub (z, z_hat);
mean_z+=CenterVector(z);
mean_z+=gsl_ran_gaussian(gsl_r,
sqrt(1.0/(double) ni_test));
-
+
gsl_vector_add_constant (z_hat, mean_z);
}
-
+
// Save data.
if (t<w_step) {continue;}
- else {
+ else {
if (t%r_pace==0) {
w_col=w%w_pace;
if (w_col==0) {
@@ -1793,21 +1793,21 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
cHyp_old.logp);
gsl_matrix_set(Result_hyp,w_col,5,
cHyp_old.n_gamma);
-
+
for (size_t i=0; i<cHyp_old.n_gamma; ++i) {
pos=mapRank2pos[rank_old[i]]+1;
gsl_matrix_set(Result_gamma,w_col,
i,pos);
-
+
beta_g[pos-1].first+=
gsl_vector_get(beta_old, i);
- beta_g[pos-1].second+=1.0;
+ beta_g[pos-1].second+=1.0;
}
-
+
if (a_mode==13) {
pheno_mean+=mean_z;
}
-
+
w++;
}
}
@@ -1818,19 +1818,19 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
cout<<"time on calculating posterior: "<<time_post<<endl;
w_col=w%w_pace;
- WriteResult (1, Result_hyp, Result_gamma, w_col);
-
+ WriteResult (1, Result_hyp, Result_gamma, w_col);
+
gsl_vector *alpha=gsl_vector_alloc (ns_test);
gsl_vector_set_zero (alpha);
WriteParam (beta_g, alpha, w);
gsl_vector_free(alpha);
gsl_matrix_free(Result_hyp);
- gsl_matrix_free(Result_gamma);
-
+ gsl_matrix_free(Result_gamma);
+
gsl_vector_free(z_hat);
gsl_vector_free(z);
- gsl_vector_free(Xb_new);
+ gsl_vector_free(Xb_new);
gsl_vector_free(Xb_old);
gsl_matrix_free(Xgamma_old);
@@ -1842,9 +1842,9 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) {
gsl_matrix_free(XtX_new);
gsl_vector_free(Xtz_new);
gsl_vector_free(beta_new);
-
+
delete [] p_gamma;
beta_g.clear();
-
+
return;
}
diff --git a/src/bslmm.h b/src/bslmm.h
index da185fa..c7768a2 100644
--- a/src/bslmm.h
+++ b/src/bslmm.h
@@ -1,22 +1,22 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __BSLMM_H__
+#ifndef __BSLMM_H__
#define __BSLMM_H__
#include <vector>
@@ -30,23 +30,23 @@ using namespace std;
class BSLMM {
-public:
+public:
// IO-related parameters.
- int a_mode;
+ int a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
// LMM-related parameters.
double l_min;
double l_max;
size_t n_region;
double pve_null;
double pheno_mean;
-
+
// BSLMM MCMC-related parameters
double h_min, h_max, h_scale; // Priors for h.
double rho_min, rho_max, rho_scale; // Priors for rho.
@@ -61,8 +61,8 @@ public:
size_t n_mh; // Number of MH steps per iter.
double geo_mean; // Mean of geometric dist.
long int randseed;
- double trace_G;
-
+ double trace_G;
+
HYPBSLMM cHyp_initial;
// Summary statistics.
@@ -74,32 +74,32 @@ public:
// Time spent on constructing the proposal distribution for
// gamma (i.e. lmm or lm analysis).
- double time_Proposal;
+ double time_Proposal;
// Indicator for individuals (phenotypes): 0 missing, 1
// available for analysis.
- vector<int> indicator_idv;
+ vector<int> indicator_idv;
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
+ vector<int> indicator_snp;
// Record SNP information.
- vector<SNPINFO> snpInfo;
-
+ vector<SNPINFO> snpInfo;
+
// Not included in PARAM.
- gsl_rng *gsl_r;
- gsl_ran_discrete_t *gsl_t;
- map<size_t, size_t> mapRank2pos;
-
+ gsl_rng *gsl_r;
+ gsl_ran_discrete_t *gsl_t;
+ map<size_t, size_t> mapRank2pos;
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
-
+
void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *eval,
const double lambda);
-
+
void MCMC (const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y);
@@ -111,10 +111,10 @@ public:
void WriteParam (const gsl_vector *alpha);
void WriteResult (const int flag, const gsl_matrix *Result_hyp,
const gsl_matrix *Result_gamma, const size_t w_col);
-
+
// Subfunctions inside MCMC.
void CalcPgamma (double *p_gammar);
-
+
double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty,
const double sigma_a2);
void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty,
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index ebbff70..e1a53a6 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -92,13 +92,13 @@ void BSLMMDAP::CopyToParam (PARAM &cPar) {
// Read hyp file.
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
+void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
vector<double> &vec_sb2, vector<double> &vec_wab) {
vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear();
igzstream infile (file_hyp.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open hyp file: "<<file_hyp<<endl;
+ cout<<"error! fail to open hyp file: "<<file_hyp<<endl;
return;
}
@@ -128,7 +128,7 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
}
// Read bf file.
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
+void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
vector<vector<vector<double> > > &BF) {
BF.clear(); vec_rs.clear();
@@ -196,12 +196,12 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
// Read category files.
// Read both continuous and discrete category file, record mapRS2catc.
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
+void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
size_t &kc, size_t &kd) {
igzstream infile (file_cat.c_str(), igzstream::in);
if (!infile) {
- cout<<"error! fail to open category file: "<<file_cat<<endl;
+ cout<<"error! fail to open category file: "<<file_cat<<endl;
return;
}
@@ -323,11 +323,11 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) {
outfile_hyp.open (file_hyp.c_str(), ofstream::out);
if (!outfile_bf) {
- cout<<"error writing file: "<<file_bf<<endl;
+ cout<<"error writing file: "<<file_bf<<endl;
return;
}
if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
+ cout<<"error writing file: "<<file_hyp<<endl;
return;
}
@@ -370,8 +370,8 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) {
return;
}
-void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
+void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
+ const gsl_matrix *Hyper, const gsl_vector *pip,
const gsl_vector *coef) {
string file_gamma, file_hyp, file_coef;
file_gamma=path_out+"/"+file_out;
@@ -388,15 +388,15 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
outfile_coef.open (file_coef.c_str(), ofstream::out);
if (!outfile_gamma) {
- cout<<"error writing file: "<<file_gamma<<endl;
+ cout<<"error writing file: "<<file_gamma<<endl;
return;
}
if (!outfile_hyp) {
- cout<<"error writing file: "<<file_hyp<<endl;
+ cout<<"error writing file: "<<file_hyp<<endl;
return;
}
if (!outfile_coef) {
- cout<<"error writing file: "<<file_coef<<endl;
+ cout<<"error writing file: "<<file_coef<<endl;
return;
}
@@ -432,8 +432,8 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs,
}
-double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
- const gsl_vector *K_eval,
+double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
+ const gsl_vector *K_eval,
const double sigma_b2, const double tau) {
gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size);
@@ -457,16 +457,16 @@ double BSLMMDAP::CalcMarginal (const gsl_vector *Uty,
return logm;
}
-double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty,
- const gsl_vector *K_eval,
- const double sigma_a2,
+double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma,
+ const gsl_vector *Uty,
+ const gsl_vector *K_eval,
+ const double sigma_a2,
const double sigma_b2, const double tau) {
clock_t time_start;
double logm=0.0;
double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0;
- gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
+ gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1,
UtXgamma->size2);
gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2);
@@ -526,8 +526,8 @@ double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) {
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y) {
clock_t time_start;
@@ -601,9 +601,9 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
return;
}
-void single_ct_regression(const gsl_matrix_int *Xd,
+void single_ct_regression(const gsl_matrix_int *Xd,
const gsl_vector_int *dlevel,
- const gsl_vector *pip_vec,
+ const gsl_vector *pip_vec,
gsl_vector *coef, gsl_vector *prior_vec) {
map<int,double> sum_pip;
@@ -635,13 +635,13 @@ void single_ct_regression(const gsl_matrix_int *Xd,
}
// Where A is the ni_test by n_cat matrix of annotations.
-void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector<string> &vec_rs,
- const vector<double> &vec_sa2,
- const vector<double> &vec_sb2,
- const vector<double> &wab,
- const vector<vector<vector<double> > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
+void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd,
+ const vector<string> &vec_rs,
+ const vector<double> &vec_sa2,
+ const vector<double> &vec_sb2,
+ const vector<double> &wab,
+ const vector<vector<vector<double> > > &BF,
+ gsl_matrix *Ac, gsl_matrix_int *Ad,
gsl_vector_int *dlevel) {
clock_t time_start;
diff --git a/src/bslmmdap.h b/src/bslmmdap.h
index 8445669..db5774b 100644
--- a/src/bslmmdap.h
+++ b/src/bslmmdap.h
@@ -78,35 +78,35 @@ public:
void CopyToParam (PARAM &cPar);
void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF);
- void WriteResult (const vector<string> &vec_rs,
- const gsl_matrix *Hyper, const gsl_vector *pip,
+ void WriteResult (const vector<string> &vec_rs,
+ const gsl_matrix *Hyper, const gsl_vector *pip,
const gsl_vector *coef);
- double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
+ double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval,
const double sigma_b2, const double tau);
- double CalcMarginal (const gsl_matrix *UtXgamma,
- const gsl_vector *Uty, const gsl_vector *K_eval,
- const double sigma_a2, const double sigma_b2,
+ double CalcMarginal (const gsl_matrix *UtXgamma,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
+ const double sigma_a2, const double sigma_b2,
const double tau);
double CalcPrior (class HYPBSLMM &cHyp);
- void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
- const gsl_vector *Uty, const gsl_vector *K_eval,
+ void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX,
+ const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y);
- void DAP_EstimateHyper (const size_t kc, const size_t kd,
- const vector<string> &vec_rs,
- const vector<double> &vec_sa2,
- const vector<double> &vec_sb2,
- const vector<double> &wab,
- const vector<vector<vector<double> > > &BF,
- gsl_matrix *Ac, gsl_matrix_int *Ad,
+ void DAP_EstimateHyper (const size_t kc, const size_t kd,
+ const vector<string> &vec_rs,
+ const vector<double> &vec_sa2,
+ const vector<double> &vec_sb2,
+ const vector<double> &wab,
+ const vector<vector<vector<double> > > &BF,
+ gsl_matrix *Ac, gsl_matrix_int *Ad,
gsl_vector_int *dlevel);
};
-void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
+void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2,
vector<double> &vec_sb2, vector<double> &vec_wab);
-void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
+void ReadFile_bf (const string &file_bf, vector<string> &vec_rs,
vector<vector<vector<double> > > &BF);
-void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
+void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs,
gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel,
size_t &kc, size_t &kd);
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index 7ad250f..733dae1 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -28,9 +28,9 @@ using namespace std;
using namespace Eigen;
// On two different clusters, compare eigen vs lapack/gsl:
-//
+//
// dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
-// dgemv, 20x or 4x faster than gsl,
+// dgemv, 20x or 4x faster than gsl,
// eigen, 1x or 0.3x slower than lapack
// invert, 20x or 10x faster than lapack
//
diff --git a/src/eigenlib.h b/src/eigenlib.h
index 8cb8880..3659dc1 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __EIGENLIB_H__
+#ifndef __EIGENLIB_H__
#define __EIGENLIB_H__
#include <vector>
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 18179f3..f4902fe 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -31,7 +31,7 @@
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_cdf.h"
-#include "lapack.h"
+#include "lapack.h"
#include "io.h"
#include "gemma.h"
#include "vc.h"
@@ -211,7 +211,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl;
cout<<endl;
}
-
+
if (option==2) {
cout<<" FILE I/O RELATED OPTIONS" << endl;
cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl;
@@ -231,11 +231,11 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" format: rs#1, base_position, chr_number"<<endl;
cout<<" rs#2, base_position, chr_number"<<endl;
cout<<" ..."<<endl;
-
+
// WJA added.
cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl;
cout<<" requires: *.bgen, *.sample files"<<endl;
-
+
cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl;
cout<<" format: variable for individual 1"<<endl;
cout<<" variable for individual 2"<<endl;
@@ -308,7 +308,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl;
cout<<endl;
}
-
+
if (option==4) {
cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl;
@@ -317,13 +317,13 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" note: non-polymorphic SNPs are excluded "<<endl;
cout<<endl;
}
-
+
if (option==5) {
cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl;
cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl;
cout<<endl;
}
-
+
if (option==6) {
cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl;
cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl;
@@ -336,7 +336,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl;
cout<<endl;
}
-
+
if (option==7) {
cout<<" LINEAR MODEL OPTIONS" << endl;
cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl;
@@ -346,7 +346,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" 4: 1-3"<<endl;
cout<<endl;
}
-
+
if (option==8) {
cout<<" LINEAR MIXED MODEL OPTIONS" << endl;
cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl;
@@ -360,7 +360,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl;
cout<<endl;
}
-
+
if (option==9) {
cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl;
@@ -371,7 +371,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl;
cout<<endl;
}
-
+
if (option==10) {
cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl;
cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl;
@@ -380,10 +380,10 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl;
cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl;
cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl;
-
+
cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl;
cout<<" options: 1: LDR"<<endl;
-
+
cout<<" MCMC OPTIONS" << endl;
cout<<" Prior" << endl;
cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl;
@@ -394,13 +394,13 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl;
cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl;
cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl;
-
+
cout<<" Proposal" << endl;
cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl;
cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl;
cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl;
-
+
cout<<" Others" << endl;
cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl;
cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl;
@@ -411,7 +411,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl;
cout<<endl;
}
-
+
if (option==11) {
cout<<" PREDICTION OPTIONS" << endl;
cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl;
@@ -419,7 +419,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl;
cout<<endl;
}
-
+
if (option==12) {
cout<<" CALC CORRELATION OPTIONS" << endl;
cout<<" -calccor "<<endl;
@@ -428,7 +428,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl;
cout<<endl;
}
-
+
if (option==13) {
cout<<" NOTE"<<endl;
cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl;
@@ -438,7 +438,7 @@ void GEMMA::PrintHelp(size_t option) {
cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl;
cout<<endl;
}
-
+
return;
}
@@ -522,7 +522,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) {
str.assign(argv[i]);
cPar.file_anno=str;
}
-
+
// WJA added.
else if (strcmp(argv[i], "-oxford")==0 ||
strcmp(argv[i], "--oxford")==0 ||
@@ -1262,7 +1262,7 @@ void GEMMA::BatchRun (PARAM &cPar) {
gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph);
gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt);
-
+
//set covariates matrix W and phenotype matrix Y
//an intercept should be included in W,
cPar.CopyCvtPhen (W, Y, 0);
@@ -1545,7 +1545,7 @@ void GEMMA::BatchRun (PARAM &cPar) {
cVarcov.CopyToParam(cPar);
}
-
+
// LM.
if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM
gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph);
@@ -2581,7 +2581,7 @@ void GEMMA::BatchRun (PARAM &cPar) {
} else {
kc=0; kd=0;
}
-
+
cout<<"## number of blocks = "<<BF.size()<<endl;
cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl;
cout<<"## grid size for hyperparameters = "<<wab.size()<<endl;
diff --git a/src/gemma.h b/src/gemma.h
index 8393470..78828ef 100644
--- a/src/gemma.h
+++ b/src/gemma.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __GEMMA_H__
+#ifndef __GEMMA_H__
#define __GEMMA_H__
#include "param.h"
@@ -25,15 +25,15 @@ using namespace std;
class GEMMA {
-public:
+public:
// Parameters.
string version;
string date;
string year;
-
+
// Constructor.
GEMMA(void);
-
+
// Functions.
void PrintHeader (void);
void PrintHelp (size_t option);
diff --git a/src/gzstream.cpp b/src/gzstream.cpp
index bbb4ba8..688b625 100644
--- a/src/gzstream.cpp
+++ b/src/gzstream.cpp
@@ -21,8 +21,8 @@
// Revision : $Revision: 1.7 $
// Revision_date : $Date: 2003/01/08 14:41:27 $
// Author(s) : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
// Standard C++ Library".
// ============================================================================
@@ -97,7 +97,7 @@ int gzstreambuf::underflow() { // used for input buffer only
buffer + 4 + num); // end of buffer
// return next character
- return * reinterpret_cast<unsigned char *>( gptr());
+ return * reinterpret_cast<unsigned char *>( gptr());
}
int gzstreambuf::flush_buffer() {
diff --git a/src/gzstream.h b/src/gzstream.h
index 4e86ad9..241ff76 100644
--- a/src/gzstream.h
+++ b/src/gzstream.h
@@ -21,8 +21,8 @@
// Revision : $Revision: 1.5 $
// Revision_date : $Date: 2002/04/26 23:30:15 $
// Author(s) : Deepak Bandyopadhyay, Lutz Kettner
-//
-// Standard streambuf implementation following Nicolai Josuttis, "The
+//
+// Standard streambuf implementation following Nicolai Josuttis, "The
// Standard C++ Library".
// ============================================================================
@@ -58,14 +58,14 @@ public:
setp( buffer, buffer + (bufferSize-1));
setg( buffer + 4, // beginning of putback area
buffer + 4, // read position
- buffer + 4); // end position
+ buffer + 4); // end position
// ASSERT: both input & output capabilities will not be used together
}
int is_open() { return opened; }
gzstreambuf* open( const char* name, int open_mode);
gzstreambuf* close();
~gzstreambuf() { close(); }
-
+
virtual int overflow( int c = EOF);
virtual int underflow();
virtual int sync();
@@ -85,15 +85,15 @@ public:
// ----------------------------------------------------------------------------
// User classes. Use igzstream and ogzstream analogously to ifstream and
-// ofstream respectively. They read and write files based on the gz*
+// ofstream respectively. They read and write files based on the gz*
// function interface of the zlib. Files are compatible with gzip compression.
// ----------------------------------------------------------------------------
class igzstream : public gzstreambase, public std::istream {
public:
- igzstream() : std::istream( &buf) {}
+ igzstream() : std::istream( &buf) {}
igzstream( const char* name, int open_mode = std::ios::in)
- : gzstreambase( name, open_mode), std::istream( &buf) {}
+ : gzstreambase( name, open_mode), std::istream( &buf) {}
gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
void open( const char* name, int open_mode = std::ios::in) {
gzstreambase::open( name, open_mode);
@@ -104,7 +104,7 @@ class ogzstream : public gzstreambase, public std::ostream {
public:
ogzstream() : std::ostream( &buf) {}
ogzstream( const char* name, int mode = std::ios::out)
- : gzstreambase( name, mode), std::ostream( &buf) {}
+ : gzstreambase( name, mode), std::ostream( &buf) {}
gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); }
void open( const char* name, int open_mode = std::ios::out) {
gzstreambase::open( name, open_mode);
diff --git a/src/lapack.h b/src/lapack.h
index 88fc509..5e1db35 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __LAPACK_H__
+#ifndef __LAPACK_H__
#define __LAPACK_H__
#include <vector>
diff --git a/src/ldr.cpp b/src/ldr.cpp
index a1e5791..f0a1b37 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -24,7 +24,7 @@
#include <cmath>
#include <iostream>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <ctime>
#include <cstring>
#include <algorithm>
@@ -51,7 +51,7 @@ using namespace Eigen;
void LDR::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
@@ -62,11 +62,11 @@ void LDR::CopyFromParam (PARAM &cPar) {
ni_test=cPar.ni_test;
ns_test=cPar.ns_test;
n_cvt=cPar.n_cvt;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -77,7 +77,7 @@ void LDR::CopyToParam (PARAM &cPar) {
//X is a p by n matrix.
void LDR::VB (const vector<vector<unsigned char> > &Xt,
const gsl_matrix *W_gsl, const gsl_vector *y_gsl) {
-
+
// Save gsl_vector and gsl_matrix into Eigen library formats.
MatrixXd W(W_gsl->size1, W_gsl->size2);
VectorXd y(y_gsl->size);
@@ -105,6 +105,6 @@ void LDR::VB (const vector<vector<unsigned char> > &Xt,
// Save results.
// TO DO.
-
+
return;
}
diff --git a/src/ldr.h b/src/ldr.h
index ceb08cf..ab55fe2 100644
--- a/src/ldr.h
+++ b/src/ldr.h
@@ -1,22 +1,22 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __LDR_H__
+#ifndef __LDR_H__
#define __LDR_H__
#include <vector>
@@ -29,16 +29,16 @@ using namespace std;
class LDR {
-public:
+public:
// IO-related parameters.
- int a_mode;
+ int a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
// Summary statistics.
size_t ni_total, ns_total; // Total number of individuals & SNPs.
size_t ni_test, ns_test; // Number of individuals & SNPs used
@@ -52,16 +52,16 @@ public:
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
vector<int> indicator_snp;
-
+
vector<SNPINFO> snpInfo; // Record SNP information.
-
+
// Not included in PARAM.
- gsl_rng *gsl_r;
-
+ gsl_rng *gsl_r;
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
-
+
void VB(const vector<vector<unsigned char> > &Xt,
const gsl_matrix *W_gsl, const gsl_vector *y_gsl);
};
diff --git a/src/lm.cpp b/src/lm.cpp
index d3ad7f3..94729db 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -412,13 +412,13 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
string chr;
std::cout << "Warning: WJA hard coded SNP missingness " <<
"threshold of 10%" << std::endl;
-
+
// Start reading genotypes and analyze.
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (t%d_pace==0 || t==(ns_total-1)) {
ProgressBar ("Reading SNPs ", t, ns_total-1);
}
-
+
// Read SNP header.
id.clear();
rs.clear();
@@ -500,7 +500,7 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) {
static_cast<double>(unzipped_data[i*3+1])/32768.0;
bgen_geno_prob_BB=
static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
+
// WJA
bgen_geno_prob_non_miss=
bgen_geno_prob_AA +
diff --git a/src/lm.h b/src/lm.h
index fac84e1..cf428f0 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -54,7 +54,7 @@ public:
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
- vector<int> indicator_snp;
+ vector<int> indicator_snp;
vector<SNPINFO> snpInfo; // Record SNP information.
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 73a9232..2b5ca84 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -55,7 +55,7 @@ void LMM::CopyFromParam (PARAM &cPar) {
file_out=cPar.file_out;
path_out=cPar.path_out;
file_gene=cPar.file_gene;
-
+
// WJA added.
file_oxford=cPar.file_oxford;
@@ -228,12 +228,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps_ab=gsl_matrix_get (Pab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
ps_ww=gsl_matrix_get (Pab, p-1, index_ww);
-
+
p_ab=ps_ab-ps_aw*ps_bw/ps_ww;
gsl_matrix_set (Pab, p, index_ab, p_ab);
}
@@ -269,7 +269,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps2_ab=gsl_matrix_get (PPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -277,7 +277,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode,
ps2_aw=gsl_matrix_get (PPab, p-1, index_aw);
ps2_bw=gsl_matrix_get (PPab, p-1, index_bw);
ps2_ww=gsl_matrix_get (PPab, p-1, index_ww);
-
+
p2_ab=ps2_ab+ps_aw*ps_bw*
ps2_ww/(ps_ww*ps_ww);
p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww;
@@ -318,7 +318,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode,
index_aw=GetabIndex (a, p, n_cvt);
index_bw=GetabIndex (b, p, n_cvt);
index_ww=GetabIndex (p, p, n_cvt);
-
+
ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab);
ps_aw=gsl_matrix_get (Pab, p-1, index_aw);
ps_bw=gsl_matrix_get (Pab, p-1, index_bw);
@@ -337,7 +337,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode,
p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw*
ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/
(ps_ww*ps_ww);
-
+
gsl_matrix_set (PPPab, p, index_ab, p3_ab);
}
}
@@ -1479,7 +1479,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
break;
}
@@ -1487,7 +1487,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
ci_total++;
continue;
}
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {
gsl_vector_set(x, ci_test, 2);
@@ -1499,7 +1499,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval,
if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
-
+
ci_total++;
ci_test++;
}
@@ -1678,7 +1678,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
ProgressBar ("Reading SNPs ", t, ns_total-1);
}
if (indicator_snp[t]==0) {continue;}
-
+
// Read SNP header.
id.clear();
rs.clear();
@@ -1752,14 +1752,14 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<bgen_N; ++i) {
if (indicator_idv[i]==0) {continue;}
-
+
bgen_geno_prob_AA=
static_cast<double>(unzipped_data[i*3])/32768.0;
bgen_geno_prob_AB=
static_cast<double>(unzipped_data[i*3+1])/32768.0;
bgen_geno_prob_BB=
static_cast<double>(unzipped_data[i*3+2])/32768.0;
-
+
// WJA.
bgen_geno_prob_non_miss = bgen_geno_prob_AA +
bgen_geno_prob_AB+bgen_geno_prob_BB;
@@ -1768,13 +1768,13 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval,
n_miss++;
}
else {
-
+
bgen_geno_prob_AA/=bgen_geno_prob_non_miss;
bgen_geno_prob_AB/=bgen_geno_prob_non_miss;
bgen_geno_prob_BB/=bgen_geno_prob_non_miss;
-
+
geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB;
-
+
gsl_vector_set(x, c_phen, geno);
gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
@@ -1962,7 +1962,7 @@ void CalcLambda (const char func_name, FUNC_PARAM &params,
}
}
else {
-
+
// If derivates change signs.
int status;
int iter=0, max_iter=100;
@@ -2010,11 +2010,11 @@ void CalcLambda (const char func_name, FUNC_PARAM &params,
status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1);
}
while (status==GSL_CONTINUE && iter<max_iter);
-
+
iter=0;
-
+
gsl_root_fdfsolver_set (s_fdf, &FDF, l);
-
+
do {
iter++;
status=gsl_root_fdfsolver_iterate (s_fdf);
@@ -2034,7 +2034,7 @@ void CalcLambda (const char func_name, FUNC_PARAM &params,
} else {
logf_l=LogL_f (l, &params);
}
-
+
if (i==0) {logf=logf_l; lambda=l;}
else if (logf<logf_l) {logf=logf_l; lambda=l;}
else {}
@@ -2228,7 +2228,7 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval,
gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
gsl_vector_view UtW_expand_x=
gsl_matrix_column(UtW_expand, UtW->size2+1);
-
+
// Start reading genotypes and analyze.
for (size_t t=0; t<indicator_snp.size(); ++t) {
!safeGetline(infile, line).eof();
@@ -2400,7 +2400,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==(int)ni_total) {
break;
}
@@ -2408,7 +2408,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
ci_total++;
continue;
}
-
+
if (b[2*j]==0) {
if (b[2*j+1]==0) {
gsl_vector_set(x, ci_test, 2);
@@ -2420,7 +2420,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval,
if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }
else {gsl_vector_set(x, ci_test, -9); n_miss++; }
}
-
+
ci_total++;
ci_test++;
}
diff --git a/src/lmm.h b/src/lmm.h
index 1e88cec..9c3de9d 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -1,4 +1,4 @@
-/*
+/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
@@ -71,7 +71,7 @@ public:
// Indicator for individuals (phenotypes): 0 missing, 1
// available for analysis.
- vector<int> indicator_idv;
+ vector<int> indicator_idv;
// Sequence indicator for SNPs: 0 ignored because of (a) maf,
// (b) miss, (c) non-poly; 1 available for analysis.
diff --git a/src/logistic.h b/src/logistic.h
index 7f9e133..b61ab14 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -1,75 +1,75 @@
-#ifndef LOGISTIC_H_
-#define LOGISTIC_H_
-
-// Mixed interface.
-void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with num. categories.
- gsl_matrix *Xc, // Continuous covariates matrix
- // Nobs x Kc
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_matrix *Xc, // Continuous covariates
- // matrix Nobs x Kc
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Reg. L1 0.0 if not used.
- double lambdaL2); // Reg. L2 0.0 if not used.
-
-double fLogit_mixed(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Categorical-only interface.
-void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K .
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used
- double lambdaL2); // Regularization L2, 0 if not used
-
-double fLogit_cat(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Continuous-only interface.
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1) + Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *yhat);// Vector of prob. predicted
- // by the logistic.
-
-int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used.
- double lambdaL2); // Regularization L2, 0 if not used.
-
-double fLogit_cont(gsl_vector *beta,
- gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-#endif
+#ifndef LOGISTIC_H_
+#define LOGISTIC_H_
+
+// Mixed interface.
+void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with num. categories.
+ gsl_matrix *Xc, // Continuous covariates matrix
+ // Nobs x Kc
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
+
+int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_matrix *Xc, // Continuous covariates
+ // matrix Nobs x Kc
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Reg. L1 0.0 if not used.
+ double lambdaL2); // Reg. L2 0.0 if not used.
+
+double fLogit_mixed(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+// Categorical-only interface.
+void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
+
+int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K .
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used
+ double lambdaL2); // Regularization L2, 0 if not used
+
+double fLogit_cat(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+// Continuous-only interface.
+void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1 + Sum_k(C_k-1) + Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *yhat);// Vector of prob. predicted
+ // by the logistic.
+
+int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Regularization L1, 0 if not used.
+ double lambdaL2); // Regularization L2, 0 if not used.
+
+double fLogit_cont(gsl_vector *beta,
+ gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
+
+#endif
diff --git a/src/main.cpp b/src/main.cpp
index b7ac884..c7f0573 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -25,12 +25,12 @@
using namespace std;
-int main(int argc, char * argv[]) {
- GEMMA cGemma;
+int main(int argc, char * argv[]) {
+ GEMMA cGemma;
PARAM cPar;
if (argc <= 1) {
- cGemma.PrintHeader();
+ cGemma.PrintHeader();
return EXIT_SUCCESS;
}
if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') {
@@ -46,27 +46,27 @@ int main(int argc, char * argv[]) {
if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') {
cGemma.PrintLicense();
return EXIT_SUCCESS;
- }
-
- cGemma.Assign(argc, argv, cPar);
+ }
+
+ cGemma.Assign(argc, argv, cPar);
ifstream check_dir((cPar.path_out).c_str());
if (!check_dir) {
mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH);
- }
-
+ }
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());}
-
+
cPar.CheckParam();
-
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
cGemma.BatchRun(cPar);
-
+
if (cPar.error==true) {return EXIT_FAILURE;}
-
+
cGemma.WriteLog(argc, argv, cPar);
-
+
return EXIT_SUCCESS; }
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index c09b587..709bdde 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -375,7 +375,7 @@ double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) {
het_probs[i] /= sum;
double p_hwe = 0.0;
-
+
// p-value calculation for p_hwe.
for (i = 0; i <= rare_copies; i++)
{
diff --git a/src/param.cpp b/src/param.cpp
index a56f5eb..413d517 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -66,7 +66,7 @@ time_UtZ(0.0), time_opt(0.0), time_Omega(0.0)
// Read files: obtain ns_total, ng_total, ns_test, ni_test.
void PARAM::ReadFiles (void) {
string file_str;
-
+
// Read cat file.
if (!file_mcat.empty()) {
if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;}
@@ -216,7 +216,7 @@ void PARAM::ReadFiles (void) {
// If both fam file and pheno files are used, use
// phenotypes inside the pheno file.
if (!file_pheno.empty()) {
-
+
// Phenotype file before genotype file.
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
p_column)==false) {error=true;}
@@ -247,7 +247,7 @@ void PARAM::ReadFiles (void) {
// Read genotype and phenotype file for BIMBAM format.
if (!file_geno.empty()) {
-
+
// Annotation file before genotype file.
if (!file_anno.empty() ) {
if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
@@ -297,11 +297,11 @@ void PARAM::ReadFiles (void) {
if (ReadFile_bim (file_str, snpInfo)==false) {error=true;}
if (t==0) {
-
+
// If both fam file and pheno files are used, use
// phenotypes inside the pheno file.
if (!file_pheno.empty()) {
-
+
// Phenotype file before genotype file.
if (ReadFile_pheno (file_pheno, indicator_pheno, pheno,
p_column)==false) {
@@ -347,7 +347,7 @@ void PARAM::ReadFiles (void) {
// Read genotype and phenotype file for multiple BIMBAM files.
if (!file_mgeno.empty()) {
-
+
// Annotation file before genotype file.
if (!file_anno.empty() ) {
if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp,
@@ -788,7 +788,7 @@ void PARAM::CheckParam (void) {
if (!file_bfile.empty()) {flag++;}
if (!file_geno.empty()) {flag++;}
if (!file_gene.empty()) {flag++;}
-
+
// WJA added.
if (!file_oxford.empty()) {flag++;}
@@ -982,7 +982,7 @@ void PARAM::CheckData (void) {
// WJA NOTE: I added this condition so that covariates can be added
// through sample, probably not exactly what is wanted.
- if(file_oxford.empty())
+ if(file_oxford.empty())
{
if ((file_cvt).empty() || (indicator_cvt).size()==0) {
n_cvt=1;
@@ -1017,7 +1017,7 @@ void PARAM::CheckData (void) {
"the number of individuals. "<<endl;
return;
}
-
+
if ( (indicator_read).size()!=0 &&
(indicator_read).size()!=(indicator_idv).size()) {
error=true;
@@ -1025,7 +1025,7 @@ void PARAM::CheckData (void) {
"match the number of individuals. "<<endl;
return;
}
-
+
// Calculate ni_total and ni_test, and set indicator_idv to 0
// whenever indicator_cvt=0, and calculate np_obs and np_miss.
ni_total=(indicator_idv).size();
@@ -1896,7 +1896,7 @@ void PARAM::CheckCvt () {
// Post-process phentoypes and covariates.
void PARAM::ProcessCvtPhen () {
-
+
// Convert indicator_pheno to indicator_idv.
int k=1;
indicator_idv.clear();
@@ -1949,7 +1949,7 @@ void PARAM::ProcessCvtPhen () {
cout<<"error! number of subsamples is less than number of"<<
"analyzed individuals. "<<endl;
} else {
-
+
// Set up random environment.
gsl_rng_env_setup();
gsl_rng *gsl_r;
diff --git a/src/param.h b/src/param.h
index 9707790..f58da53 100644
--- a/src/param.h
+++ b/src/param.h
@@ -141,7 +141,7 @@ public:
string file_read; // File containing total number of reads.
string file_gene; // Gene expression file.
string file_snps; // File containing analyzed SNPs or genes.
-
+
// WJA added.
string file_oxford;
@@ -212,7 +212,7 @@ public:
// Summary statistics.
bool error;
-
+
// Number of individuals.
size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
@@ -221,7 +221,7 @@ public:
// Number of SNPs.
size_t ns_total, ns_test, ns_study, ns_ref;
-
+
size_t ng_total, ng_test; // Number of genes.
size_t ni_control, ni_case; // Number of controls and number of cases.
size_t ni_subsample; // Number of subsampled individuals.
diff --git a/src/prdt.cpp b/src/prdt.cpp
index db0fa14..b29d150 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -1,17 +1,17 @@
/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
-
+
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
-
+
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
+
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
@@ -24,7 +24,7 @@
#include <bitset>
#include <vector>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include <cmath>
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -43,36 +43,36 @@ using namespace std;
void PRDT::CopyFromParam (PARAM &cPar) {
a_mode=cPar.a_mode;
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
- indicator_pheno=cPar.indicator_pheno;
+
+ indicator_pheno=cPar.indicator_pheno;
indicator_cvt=cPar.indicator_cvt;
indicator_idv=cPar.indicator_idv;
-
+
snpInfo=cPar.snpInfo;
mapRS2est=cPar.mapRS2est;
-
+
time_eigen=0;
-
+
n_ph=cPar.n_ph;
np_obs=cPar.np_obs;
np_miss=cPar.np_miss;
ns_total=cPar.ns_total;
- ns_test=0;
-
+ ns_test=0;
+
return;
}
void PRDT::CopyToParam (PARAM &cPar) {
cPar.ns_test=ns_test;
cPar.time_eigen=time_eigen;
-
+
return;
-}
+}
void PRDT::WriteFiles (gsl_vector *y_prdt) {
string file_str;
@@ -80,13 +80,13 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) {
file_str+=".";
file_str+="prdt";
file_str+=".txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
cout<<"error writing file: "<<file_str.c_str()<<endl;
return;
}
-
+
size_t ci_test=0;
for (size_t i=0; i<indicator_idv.size(); i++) {
if (indicator_idv[i]==1) {
@@ -96,7 +96,7 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) {
ci_test++;
}
}
-
+
outfile.close();
outfile.clear();
return;
@@ -106,13 +106,13 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) {
string file_str;
file_str=path_out+"/"+file_out;
file_str+=".prdt.txt";
-
+
ofstream outfile (file_str.c_str(), ofstream::out);
if (!outfile) {
cout<<"error writing file: "<<file_str.c_str()<<endl;
return;
}
-
+
size_t ci_test=0;
for (size_t i=0; i<indicator_cvt.size(); i++) {
if (indicator_cvt[i]==0) {
@@ -126,7 +126,7 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) {
ci_test++;
}
}
-
+
outfile.close();
outfile.clear();
return;
@@ -134,21 +134,21 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) {
void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
size_t ni_test=u_hat->size, ni_total=G->size1;
-
+
gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test);
- gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
+ gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test);
gsl_vector *eval=gsl_vector_alloc (ni_test);
gsl_vector *Utu=gsl_vector_alloc (ni_test);
gsl_vector *w=gsl_vector_alloc (ni_total);
gsl_permutation *pmt=gsl_permutation_alloc (ni_test);
-
+
//center matrix G based on indicator_idv
for (size_t i=0; i<ni_total; i++) {
gsl_vector_set(w, i, indicator_idv[i]);
}
CenterMatrix(G, w);
-
+
//obtain Koo and Kfo
size_t o_i=0, o_j=0;
double d;
@@ -166,7 +166,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
}
if (indicator_idv[i]==1) {o_i++;}
}
-
+
//matrix operations to get u_prdt
cout<<"Start Eigen-Decomposition..."<<endl;
clock_t time_start=clock();
@@ -177,8 +177,8 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
}
}
- time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
-
+ time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
for (size_t i=0; i<eval->size; i++) {
d=gsl_vector_get(eval, i);
@@ -189,7 +189,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
}
gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
-
+
// Free matrices.
gsl_matrix_free(Goo);
gsl_matrix_free(Gfo);
@@ -199,7 +199,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
gsl_vector_free(w);
gsl_permutation_free(pmt);
- return;
+ return;
}
void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
@@ -208,17 +208,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
cout<<"error reading genotype file:"<<file_geno<<endl;
return;
}
-
+
string line;
char *ch_ptr;
string rs;
-
+
size_t n_miss, n_train_nomiss, c_phen;
double geno, x_mean, x_train_mean, effect_size;
-
+
gsl_vector *x=gsl_vector_alloc (y_prdt->size);
gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size);
-
+
ns_test=0;
// Start reading genotypes and analyze.
@@ -227,24 +227,24 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
if (t%d_pace==0 || t==(ns_total-1)) {
ProgressBar ("Reading SNPs ", t, ns_total-1);
}
-
+
ch_ptr=strtok ((char *)line.c_str(), " , \t");
rs=ch_ptr;
ch_ptr=strtok (NULL, " , \t");
- ch_ptr=strtok (NULL, " , \t");
-
+ ch_ptr=strtok (NULL, " , \t");
+
if (mapRS2est.count(rs)==0) {
continue;
} else {
effect_size=mapRS2est[rs];
}
-
+
x_mean=0.0;
c_phen=0;
n_miss=0;
x_train_mean=0;
n_train_nomiss=0;
-
+
gsl_vector_set_zero(x_miss);
for (size_t i=0; i<indicator_idv.size(); ++i) {
@@ -260,10 +260,10 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
gsl_vector_set(x_miss, c_phen, 0.0);
n_miss++;
} else {
- geno=atof(ch_ptr);
-
- gsl_vector_set(x, c_phen, geno);
- gsl_vector_set(x_miss, c_phen, 1.0);
+ geno=atof(ch_ptr);
+
+ gsl_vector_set(x, c_phen, geno);
+ gsl_vector_set(x_miss, c_phen, 1.0);
x_mean+=geno;
}
c_phen++;
@@ -274,12 +274,12 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
cout << "snp " << rs << " has missing genotype for all " <<
"individuals and will be ignored." << endl;
continue;}
-
+
x_mean/=(double)(x->size-n_miss);
x_train_mean/=(double)(n_train_nomiss);
-
-
+
+
for (size_t i=0; i<x->size; ++i) {
geno=gsl_vector_get(x, i);
if (gsl_vector_get (x_miss, i)==0) {
@@ -291,17 +291,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
gsl_vector_scale (x, effect_size);
gsl_vector_add (y_prdt, x);
-
+
ns_test++;
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
gsl_vector_free (x_miss);
-
+
infile.close();
infile.clear();
-
+
return;
}
@@ -312,35 +312,35 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
cout<<"error reading bed file:"<<file_bed<<endl;
return;
}
-
+
char ch[1];
- bitset<8> b;
+ bitset<8> b;
string rs;
-
+
size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss;
double geno, x_mean, x_train_mean, effect_size;
-
+
gsl_vector *x=gsl_vector_alloc (y_prdt->size);
-
+
// Calculate n_bit and c, the number of bit for each SNP.
if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
else {n_bit=indicator_idv.size()/4+1; }
-
+
// Print the first 3 magic numbers.
for (size_t i=0; i<3; ++i) {
infile.read(ch,1);
b=ch[0];
- }
-
+ }
+
ns_test=0;
-
+
for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
if (t%d_pace==0 || t==snpInfo.size()-1) {
ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);
}
-
+
rs=snpInfo[t].rs_number;
-
+
if (mapRS2est.count(rs)==0) {
continue;
} else {
@@ -349,7 +349,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
// n_bit, and 3 is the number of magic numbers.
infile.seekg(t*n_bit+3);
-
+
// Read genotypes.
x_mean=0.0;
n_miss=0;
@@ -359,7 +359,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) &&
ci_total==indicator_idv.size()) {
break;
@@ -404,19 +404,19 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
ci_test++;
}
ci_total++;
-
+
}
}
-
+
if (x->size==n_miss) {
cout << "snp " << rs << " has missing genotype for all " <<
"individuals and will be ignored."<<endl;
continue;
}
-
+
x_mean/=(double)(x->size-n_miss);
x_train_mean/=(double)(n_train_nomiss);
-
+
for (size_t i=0; i<x->size; ++i) {
geno=gsl_vector_get(x, i);
if (geno==-9) {
@@ -425,47 +425,47 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
gsl_vector_set(x, i, geno-x_train_mean);
}
}
-
+
gsl_vector_scale (x, effect_size);
gsl_vector_add (y_prdt, x);
-
+
ns_test++;
- }
+ }
cout<<endl;
-
+
gsl_vector_free (x);
-
+
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
// Predict missing phenotypes using ridge regression.
// Y_hat contains fixed effects
void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
- gsl_matrix *Y_full) {
+ gsl_matrix *Y_full) {
gsl_vector *y_obs=gsl_vector_alloc (np_obs);
gsl_vector *y_miss=gsl_vector_alloc (np_miss);
gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs);
gsl_vector *Hiy=gsl_vector_alloc (np_obs);
-
+
size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
-
+
// Obtain H_oo, H_mo.
- c_obs1=0; c_miss1=0;
+ c_obs1=0; c_miss1=0;
for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
if (indicator_cvt[i1]==0) {continue;}
for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
-
+
c_obs2=0; c_miss2=0;
for (vector<int>::size_type i2=0;
i2<indicator_pheno.size(); ++i2) {
if (indicator_cvt[i2]==0) {continue;}
for (vector<int>::size_type j2=0;
j2<n_ph; j2++) {
-
+
if (indicator_pheno[i2][j2]==1) {
if (indicator_pheno[i1][j1]==1) {
gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
@@ -476,30 +476,30 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
} else {
c_miss2++;
}
- }
+ }
}
-
+
if (indicator_pheno[i1][j1]==1) {
c_obs1++;
} else {
c_miss1++;
}
}
-
- }
-
+
+ }
+
// Do LU decomposition of H_oo.
int sig;
gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
LUDecomp (H_oo, pmt, &sig);
-
+
// Obtain y_obs=y_full-y_hat.
// Add the fixed effects part to y_miss: y_miss=y_hat.
c_obs1=0; c_miss1=0;
for (vector<int>::size_type i=0;
i<indicator_pheno.size(); ++i) {
if (indicator_cvt[i]==0) {continue;}
-
+
for (vector<int>::size_type j=0; j<n_ph; ++j) {
if (indicator_pheno[i][j]==1) {
gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) );
@@ -509,18 +509,18 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
c_miss1++;
}
}
- }
-
+ }
+
LUSolve (H_oo, pmt, y_obs, Hiy);
-
+
gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-
+
// Put back predicted y_miss to Y_full.
c_miss1=0;
for (vector<int>::size_type i=0;
i<indicator_pheno.size(); ++i) {
if (indicator_cvt[i]==0) {continue;}
-
+
for (vector<int>::size_type j=0; j<n_ph; ++j) {
if (indicator_pheno[i][j]==0) {
gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) );
@@ -528,14 +528,14 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
}
}
}
-
+
// Free matrices.
gsl_vector_free(y_obs);
gsl_vector_free(y_miss);
gsl_matrix_free(H_oo);
gsl_matrix_free(H_mo);
gsl_vector_free(Hiy);
-
+
return;
}
diff --git a/src/prdt.h b/src/prdt.h
index 2da9fd0..0939b36 100644
--- a/src/prdt.h
+++ b/src/prdt.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __PRDT_H__
+#ifndef __PRDT_H__
#define __PRDT_H__
#include <vector>
@@ -29,30 +29,30 @@
using namespace std;
class PRDT {
-
+
public:
// IO-related parameters.
size_t a_mode;
size_t d_pace;
-
+
string file_bfile;
string file_geno;
string file_out;
string path_out;
-
+
vector<vector<int> > indicator_pheno;
vector<int> indicator_cvt;
vector<int> indicator_idv;
vector<SNPINFO> snpInfo;
map<string, double> mapRS2est;
-
+
size_t n_ph;
size_t np_obs, np_miss;
size_t ns_total;
size_t ns_test;
-
+
double time_eigen;
-
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
diff --git a/src/varcov.cpp b/src/varcov.cpp
index 48a4fc5..46b5bf8 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -28,7 +28,7 @@
#include <cstring>
#include <cmath>
#include <stdio.h>
-#include <stdlib.h>
+#include <stdlib.h>
#include "gsl/gsl_vector.h"
#include "gsl/gsl_matrix.h"
@@ -47,22 +47,22 @@ using namespace std;
void VARCOV::CopyFromParam (PARAM &cPar) {
d_pace=cPar.d_pace;
-
+
file_bfile=cPar.file_bfile;
file_geno=cPar.file_geno;
file_out=cPar.file_out;
path_out=cPar.path_out;
-
+
time_opt=0.0;
window_cm=cPar.window_cm;
window_bp=cPar.window_bp;
window_ns=cPar.window_ns;
-
+
indicator_idv=cPar.indicator_idv;
indicator_snp=cPar.indicator_snp;
snpInfo=cPar.snpInfo;
-
+
return;
}
@@ -82,7 +82,7 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
if (flag==0) {
outfile.open (file_cov.c_str(), ofstream::out);
if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;}
-
+
outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis"
<<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0"
<<"\t"<<"af"<<"\t"<<"window_size"
@@ -111,11 +111,11 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub,
}
}
}
-
+
outfile<<endl;
}
}
-
+
outfile.close();
outfile.clear();
return;
@@ -147,7 +147,7 @@ void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort) {
size_t t2=0, n_nb=0;
for (size_t t=0; t<indicator_snp.size(); ++t) {
if (indicator_snp[t]==0) {continue;}
-
+
if (snpInfo_sort[t].chr=="-9" ||
(snpInfo_sort[t].cM==-9 && window_cm!=0) ||
(snpInfo_sort[t].base_position==-9 && window_bp!=0) ) {
@@ -203,7 +203,7 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) {
}
return;
-}
+}
// Read the genotype file again, calculate r2 between pairs of SNPs
// within a window, output the file every 10K SNPs the output results
@@ -229,7 +229,7 @@ void VARCOV::AnalyzeBimbam () {
for (size_t i=0; i<indicator_idv.size(); i++) {
ni_test+=indicator_idv[i];
}
-
+
gsl_vector *geno=gsl_vector_alloc (ni_test);
double geno_mean;
@@ -253,29 +253,29 @@ void VARCOV::AnalyzeBimbam () {
if (X_mat.size()==0) {
n_nb=snpInfo[t].n_nb+1;
} else {
- n_nb=snpInfo[t].n_nb-n_nb+1;
+ n_nb=snpInfo[t].n_nb-n_nb+1;
}
for (int i=0; i<n_nb; i++) {
- if (X_mat.size()==0) {t2=t;}
+ if (X_mat.size()==0) {t2=t;}
// Read a line of the snp is filtered out.
inc=0;
while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
- t2++; inc++;
+ t2++; inc++;
}
Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean);
gsl_vector_add_constant (geno, -1.0*geno_mean);
-
+
for (size_t j=0; j<geno->size; j++) {
x_vec[j]=gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
- }
-
+ }
+
n_nb=snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
@@ -301,8 +301,8 @@ void VARCOV::AnalyzeBimbam () {
gsl_vector_free(geno);
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
@@ -314,7 +314,7 @@ void VARCOV::AnalyzePlink () {
// Calculate the number of right-hand-side neighbours for each SNP.
vector<SNPINFO> snpInfo_sub;
CalcNB(snpInfo);
-
+
size_t ni_test=0;
for (size_t i=0; i<indicator_idv.size(); i++) {
ni_test+=indicator_idv[i];
@@ -343,30 +343,30 @@ void VARCOV::AnalyzePlink () {
if (X_mat.size()==0) {
n_nb=snpInfo[t].n_nb+1;
} else {
- n_nb=snpInfo[t].n_nb-n_nb+1;
+ n_nb=snpInfo[t].n_nb-n_nb+1;
}
for (int i=0; i<n_nb; i++) {
- if (X_mat.size()==0) {t2=t;}
+ if (X_mat.size()==0) {t2=t;}
// Read a line if the SNP is filtered out.
inc=0;
while (t2<indicator_snp.size() && indicator_snp[t2]==0) {
t2++;
- inc++;
+ inc++;
}
Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean);
gsl_vector_add_constant (geno, -1.0*geno_mean);
-
+
for (size_t j=0; j<geno->size; j++) {
x_vec[j]=gsl_vector_get(geno, j);
}
X_mat.push_back(x_vec);
t2++;
- }
-
+ }
+
n_nb=snpInfo[t].n_nb;
Calc_Cor(X_mat, cov_vec);
@@ -392,7 +392,7 @@ void VARCOV::AnalyzePlink () {
gsl_vector_free(geno);
infile.close();
- infile.clear();
-
+ infile.clear();
+
return;
}
diff --git a/src/varcov.h b/src/varcov.h
index 3b45123..4a1eb3a 100644
--- a/src/varcov.h
+++ b/src/varcov.h
@@ -16,7 +16,7 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#ifndef __VARCOV_H__
+#ifndef __VARCOV_H__
#define __VARCOV_H__
#include "gsl/gsl_vector.h"
@@ -47,7 +47,7 @@ public:
double window_cm;
size_t window_bp;
size_t window_ns;
-
+
// Main functions.
void CopyFromParam (PARAM &cPar);
void CopyToParam (PARAM &cPar);
diff --git a/src/vc.cpp b/src/vc.cpp
index 9fe1894..e8ccece 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -175,7 +175,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1);
double sigma2;
-
+
// Calculate H = \sum_i^{k+1} \sigma_i^2 K_i.
gsl_matrix_set_zero (p->P);
for (size_t i=0; i<n_vc+1; i++) {
@@ -196,7 +196,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
gsl_matrix_scale(K_temp, sigma2);
gsl_matrix_add (p->P, K_temp);
}
-
+
// Calculate H^{-1}.
eigenlib_invert(p->P);
@@ -211,7 +211,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
// Calculate Py, KPy, PKPy.
gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py);
-
+
double d;
for (size_t i=0; i<n_vc+1; i++) {
gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i);
@@ -221,7 +221,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) {
gsl_vector_memcpy (&KPy.vector, p->Py);
} else {
gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1);
-
+
// Seems to be important to use gsl dgemv here instead of
// eigenlib_dgemv; otherwise.
gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0,
@@ -643,7 +643,7 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps,
}
while (!safeGetline(infile, line).eof()) {
-
+
//do not read cor values this time; upto col_n-1.
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -942,7 +942,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
ReadHeader_vc (line, header);
while (!safeGetline(infile, line).eof()) {
-
+
// Do not read cor values this time; upto col_n-1.
d_pos1=0; d_cm1=0;
ch_ptr=strtok ((char *)line.c_str(), " , \t");
@@ -1115,7 +1115,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs,
mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor;
}
cout<<crt_factor<<endl;
-
+
// Correct qvar.
if (i==j) {
vec_qvar[i]*=crt_factor;
@@ -1198,7 +1198,7 @@ void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat,
// Get qvar_mat.
gsl_matrix_memcpy (qvar_mat, Vq);
gsl_matrix_scale (qvar_mat, 1.0/(df*df));
-
+
// Calculate variance for these estimates.
for (size_t i=0; i<n_vc; i++) {
for (size_t j=i; j<n_vc; j++) {
@@ -1873,7 +1873,7 @@ void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W,
size_t it=0;
double s=1;
while (abs(s)>1e-3 && it<100) {
-
+
// Update tau_inv.
gsl_blas_ddot (q_vec, pve, &d);
if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;}
@@ -2143,7 +2143,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace,
gsl_vector_mul(wz, w);
int n_bit;
-
+
// Calculate n_bit and c, the number of bit for each snp.
if (ni_total%4==0) {n_bit=ni_total/4;}
else {n_bit=ni_total/4+1; }
@@ -2170,7 +2170,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0.
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==ni_total) {
break;
}
@@ -2393,7 +2393,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace,
if (indicator_snp[t]==0) {continue;}
// n_bit, and 3 is the number of magic numbers.
- infile.seekg(t*n_bit+3);
+ infile.seekg(t*n_bit+3);
// Read genotypes.
geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0;
@@ -2402,7 +2402,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace,
b=ch[0];
// Minor allele homozygous: 2.0; major: 0.0;
- for (size_t j=0; j<4; ++j) {
+ for (size_t j=0; j<4; ++j) {
if ((i==(n_bit-1)) && ci_total==ni_total) {
break;
}