diff options
author | Peter Carbonetto | 2017-07-07 11:20:56 -0500 |
---|---|---|
committer | GitHub | 2017-07-07 11:20:56 -0500 |
commit | 86e96ede4ff0955bb2d03ac6c1bd7562a3984955 (patch) | |
tree | 33120540091e7d16b58f389a13949df397535912 | |
parent | b3747413e6c5c8cd447e979157880676da66a342 (diff) | |
parent | b9758364059d52e153a9f1b4fcae3bc3f3e68422 (diff) | |
download | pangemma-86e96ede4ff0955bb2d03ac6c1bd7562a3984955.tar.gz |
Merge pull request #51 from genenetwork/spacing
Spacing fixes.
-rw-r--r-- | Makefile.linux | 10 | ||||
-rw-r--r-- | Makefile.macosx | 8 | ||||
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | doc/manual.tex | 30 | ||||
-rw-r--r-- | src/bslmm.cpp | 780 | ||||
-rw-r--r-- | src/bslmm.h | 48 | ||||
-rw-r--r-- | src/bslmmdap.cpp | 60 | ||||
-rw-r--r-- | src/bslmmdap.h | 36 | ||||
-rw-r--r-- | src/eigenlib.cpp | 4 | ||||
-rw-r--r-- | src/eigenlib.h | 2 | ||||
-rw-r--r-- | src/gemma.cpp | 46 | ||||
-rw-r--r-- | src/gemma.h | 8 | ||||
-rw-r--r-- | src/gzstream.cpp | 6 | ||||
-rw-r--r-- | src/gzstream.h | 16 | ||||
-rw-r--r-- | src/lapack.h | 2 | ||||
-rw-r--r-- | src/ldr.cpp | 18 | ||||
-rw-r--r-- | src/ldr.h | 26 | ||||
-rw-r--r-- | src/lm.cpp | 6 | ||||
-rw-r--r-- | src/lm.h | 2 | ||||
-rw-r--r-- | src/lmm.cpp | 50 | ||||
-rw-r--r-- | src/lmm.h | 4 | ||||
-rw-r--r-- | src/logistic.h | 150 | ||||
-rw-r--r-- | src/main.cpp | 30 | ||||
-rw-r--r-- | src/mathfunc.cpp | 2 | ||||
-rw-r--r-- | src/param.cpp | 24 | ||||
-rw-r--r-- | src/param.h | 6 | ||||
-rw-r--r-- | src/prdt.cpp | 188 | ||||
-rw-r--r-- | src/prdt.h | 14 | ||||
-rw-r--r-- | src/varcov.cpp | 56 | ||||
-rw-r--r-- | src/varcov.h | 4 | ||||
-rw-r--r-- | src/vc.cpp | 26 |
31 files changed, 834 insertions, 834 deletions
diff --git a/Makefile.linux b/Makefile.linux index 0b2c0b9..b51bbfe 100644 --- a/Makefile.linux +++ b/Makefile.linux @@ -14,8 +14,8 @@ SYS = LNX # Disable WITH_LAPACK option can slow computation speed significantly and is not recommended # Disable WITH_ARPACK option only disable -apprx option in the software WITH_LAPACK = 1 -FORCE_32BIT = -FORCE_DYNAMIC = +FORCE_32BIT = +FORCE_DYNAMIC = DIST_NAME = gemma-0.96alpha # -------------------------------------------------------------------- @@ -45,7 +45,7 @@ OUTPUT = $(BIN_DIR)/gemma SOURCES = $(SRC_DIR)/main.cpp -HDR = +HDR = # Detailed libary paths, D for dynamic and S for static LIBS_LNX_D_LAPACK = -llapack @@ -54,7 +54,7 @@ LIBS_LNX_S_LAPACK = /software/atlas-3.10.3-el7-x86_64/lib/liblapack.a \ /software/atlas-3.10.3-el7-x86_64/lib/libcblas.a \ /software/atlas-3.10.3-el7-x86_64/lib/libf77blas.a \ /software/atlas-3.10.3-el7-x86_64/lib/libatlas.a -lgfortran \ - -Wl,--allow-multiple-definition + -Wl,--allow-multiple-definition SOURCES += $(SRC_DIR)/param.cpp $(SRC_DIR)/gemma.cpp $(SRC_DIR)/io.cpp $(SRC_DIR)/lm.cpp $(SRC_DIR)/lmm.cpp $(SRC_DIR)/vc.cpp $(SRC_DIR)/mvlmm.cpp $(SRC_DIR)/bslmm.cpp $(SRC_DIR)/prdt.cpp $(SRC_DIR)/mathfunc.cpp $(SRC_DIR)/gzstream.cpp $(SRC_DIR)/eigenlib.cpp $(SRC_DIR)/ldr.cpp $(SRC_DIR)/bslmmdap.cpp $(SRC_DIR)/logistic.cpp $(SRC_DIR)/varcov.cpp HDR += $(SRC_DIR)/param.h $(SRC_DIR)/gemma.h $(SRC_DIR)/io.h $(SRC_DIR)/lm.h $(SRC_DIR)/lmm.h $(SRC_DIR)/vc.h $(SRC_DIR)/mvlmm.h $(SRC_DIR)/bslmm.h $(SRC_DIR)/prdt.h $(SRC_DIR)/mathfunc.h $(SRC_DIR)/gzstream.h $(SRC_DIR)/eigenlib.h @@ -91,7 +91,7 @@ $(OUTPUT): $(OBJS) $(OBJS) : $(HDR) -.cpp.o: +.cpp.o: $(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o .SUFFIXES : .cpp .c .o $(SUFFIXES) diff --git a/Makefile.macosx b/Makefile.macosx index 992e442..00467d6 100644 --- a/Makefile.macosx +++ b/Makefile.macosx @@ -14,7 +14,7 @@ SYS = MAC # Disable WITH_LAPACK option can slow computation speed significantly and is not recommended # Disable WITH_ARPACK option only disable -apprx option in the software WITH_LAPACK = 1 -FORCE_32BIT = +FORCE_32BIT = FORCE_DYNAMIC = 1 DIST_NAME = gemma-0.96 @@ -37,13 +37,13 @@ OUTPUT = $(BIN_DIR)/gemma SOURCES = $(SRC_DIR)/main.cpp -HDR = +HDR = # Detailed libary paths, D for dynamic and S for static LIBS_LNX_D_LAPACK = -llapack LIBS_MAC_D_LAPACK = -framework Accelerate -LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition +LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition # Options @@ -82,7 +82,7 @@ $(OUTPUT): $(OBJS) $(OBJS) : $(HDR) -.cpp.o: +.cpp.o: $(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o .SUFFIXES : .cpp .c .o $(SUFFIXES) @@ -61,7 +61,7 @@ MQS algorithm to estimate variance components. If you use GEMMA for published work, please cite our paper: + Xiang Zhou and Matthew Stephens (2012). [Genome-wide efficient -mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310) +mixed-model analysis for association studies.](http://doi.org/10.1038/ng.2310) *Nature Genetics* **44**, 821–824. If you use the multivariate linear mixed model (mvLMM) in your @@ -77,7 +77,7 @@ If you use the Bayesian sparse linear mixed model (BSLMM), please cite: + Xiang Zhou, Peter Carbonetto and Matthew Stephens (2013). [Polygenic modeling with bayesian sparse linear mixed models.](http://doi.org/10.1371/journal.pgen.1003264) *PLoS Genetics* -**9**, e1003264. +**9**, e1003264. And if you use of the variance component estimation using summary statistics, please cite: @@ -118,7 +118,7 @@ This is the current structure of the GEMMA source repository: subfolders; see Wilson et al "Good Enough Practices" paper for example of this.* -## Setup +## Setup There are two ways to install GEMMA: diff --git a/doc/manual.tex b/doc/manual.tex index d47aa22..c0119dc 100644 --- a/doc/manual.tex +++ b/doc/manual.tex @@ -100,11 +100,11 @@ available open-source numerical libraries. Stephens (2014). Efficient multivariate linear mixed model algorithms for genome-wide association studies. Nature Methods. 11: 407-409. - + \item Bayesian sparse linear mixed models \\ Xiang Zhou, Peter Carbonetto and Matthew Stephens (2013). Polygenic modeling with Bayesian sparse linear mixed models. PLoS Genetics. 9(2): e1003264. - + \item Variance component estimation with individual-level or summary data\\ Xiang Zhou (2016). A unified framework for variance component estimation with summary statistics in genome-wide association @@ -212,10 +212,10 @@ effects terms ($\mathbf X\boldsymbol\beta$). These two parameters are defined as follows: \begin{align*} \mbox{PVE}(\bbeta, \bu, \tau) & - :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\ + :=\frac{\mbox{V}(\bX\bbeta+\bu)}{\mbox{V}(\bX\bbeta+\bu)+\tau^{-1}}, \\ \mbox{PGE}(\bbeta, \bu) & :=\frac{\mbox{V}(\bX\bbeta)}{\mbox{V}(\bX\bbeta+\bu)}, \\ -\intertext{where} +\intertext{where} \mbox{V}({\bf x}) &:=\frac{1}{n}\sum_{i=1}^n (x_i-\overline {x})^2. \end{align*} @@ -429,7 +429,7 @@ BIMBAM mean genotype file from IMPUTE genotype files (\url{http://www.stats.ox.ac.uk/~marchini/software/gwas/file_format.html})\cite{Howie:2009}: % \begin{verbatim} -cat [impute filename] | awk -v s=[number of samples/individuals] +cat [impute filename] | awk -v s=[number of samples/individuals] '{ printf $2 "," $4 "," $5; for(i=1; i<=s; i++) printf "," $(i*3+3)*2+$(i*3+4); printf "\n" }' > [bimbam filename] \end{verbatim} @@ -815,7 +815,7 @@ chr rs ps n_mis n_obs allele1 allele0 af beta se \end{verbatim} % -The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test. +The 11 columns are: chromosome numbers, snp ids, base pair positions on the chromosome, number of missing individuals for a given snp, number of non-missing individuals for a given snp, minor allele, major allele, allele frequency, beta estimates, standard errors for beta, and $p$ values from the Wald test. \subsection{Estimate Relatedness Matrix from Genotypes} @@ -1009,7 +1009,7 @@ ped format or the BIMBAM format are: \begin{verbatim} ./gemma -bfile [prefix] -k [filename] -lmm [num] -n [num1] [num2] [num3] -o [prefix] -./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] +./gemma -g [filename] -p [filename] -a [filename] -k [filename] -lmm [num] -n [num1] [num2] [num3] -o [prefix] \end{verbatim} @@ -1041,7 +1041,7 @@ missing, one can impute these missing values before association tests: \begin{verbatim} ./gemma -bfile [prefix] -k [filename] -predict -n [num1] [num2] [num3] -o [prefix] -./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict +./gemma -g [filename] -p [filename] -a [filename] -k [filename] -predict -n [num1] [num2] [num3] -o [prefix] \end{verbatim} @@ -1154,10 +1154,10 @@ below: % \begin{verbatim} h pve rho pge pi n_gamma -4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25 -5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26 -5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26 -6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25 +4.777635e-01 5.829042e-01 4.181280e-01 4.327976e-01 2.106763e-03 25 +5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26 +5.278073e-01 5.667885e-01 3.339020e-01 4.411859e-01 2.084355e-03 26 +6.361674e-01 6.461678e-01 3.130355e-01 3.659850e-01 2.188401e-03 25 5.479237e-01 6.228036e-01 3.231856e-01 4.326231e-01 2.164183e-03 27 \end{verbatim} % @@ -1196,9 +1196,9 @@ The basic usages for association analysis with either the PLINK binary ped format or the BIMBAM format are: \begin{verbatim} -./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] +./gemma -bfile [prefix] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix] -./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] +./gemma -g [filename] -p [filename] -epm [filename] -emu [filename] -ebv [filename] -k [filename] -predict [num] -o [prefix] \end{verbatim} @@ -1544,7 +1544,7 @@ source code for detailed examples. \item \textcolor{red}{-ci [num]} \quad specify fitting algorithm to compute the standard errors. (default 1; valid value 1-2; 1: MQS-HEW; 2: MQS-LDW.) \end{itemize} % - + \clearpage \bibliographystyle{plain} diff --git a/src/bslmm.cpp b/src/bslmm.cpp index 563b743..d579802 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -1,17 +1,17 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -24,7 +24,7 @@ #include <cmath> #include <iostream> #include <stdio.h> -#include <stdlib.h> +#include <stdlib.h> #include <ctime> #include <cstring> #include <algorithm> @@ -50,32 +50,32 @@ using namespace std; void BSLMM::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; - + file_bfile=cPar.file_bfile; file_geno=cPar.file_geno; file_out=cPar.file_out; path_out=cPar.path_out; - - l_min=cPar.h_min; - l_max=cPar.h_max; - n_region=cPar.n_region; + + l_min=cPar.h_min; + l_max=cPar.h_max; + n_region=cPar.n_region; pve_null=cPar.pve_null; pheno_mean=cPar.pheno_mean; - + time_UtZ=0.0; time_Omega=0.0; n_accept=0; - - h_min=cPar.h_min; - h_max=cPar.h_max; + + h_min=cPar.h_min; + h_max=cPar.h_max; h_scale=cPar.h_scale; - rho_min=cPar.rho_min; - rho_max=cPar.rho_max; + rho_min=cPar.rho_min; + rho_max=cPar.rho_max; rho_scale=cPar.rho_scale; - logp_min=cPar.logp_min; - logp_max=cPar.logp_max; + logp_min=cPar.logp_min; + logp_max=cPar.logp_max; logp_scale=cPar.logp_scale; - + s_min=cPar.s_min; s_max=cPar.s_max; w_step=cPar.w_step; @@ -86,17 +86,17 @@ void BSLMM::CopyFromParam (PARAM &cPar) { geo_mean=cPar.geo_mean; randseed=cPar.randseed; trace_G=cPar.trace_G; - + ni_total=cPar.ni_total; ns_total=cPar.ns_total; ni_test=cPar.ni_test; ns_test=cPar.ns_test; n_cvt=cPar.n_cvt; - + indicator_idv=cPar.indicator_idv; indicator_snp=cPar.indicator_snp; snpInfo=cPar.snpInfo; - + return; } @@ -108,7 +108,7 @@ void BSLMM::CopyToParam (PARAM &cPar) { cPar.n_accept=n_accept; cPar.pheno_mean=pheno_mean; cPar.randseed=randseed; - + return; } @@ -119,28 +119,28 @@ void BSLMM::WriteBV (const gsl_vector *bv) { ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; + cout<<"error writing file: "<<file_str.c_str()<<endl; return; } - + size_t t=0; for (size_t i=0; i<ni_total; ++i) { if (indicator_idv[i]==0) { outfile<<"NA"<<endl; - } + } else { outfile<<scientific<<setprecision(6)<< gsl_vector_get(bv, t)<<endl; t++; } - } - - outfile.clear(); - outfile.close(); + } + + outfile.clear(); + outfile.close(); return; } -void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, +void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector *alpha, const size_t w) { string file_str; file_str=path_out+"/"+file_out; @@ -148,20 +148,20 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; + cout<<"error writing file: "<<file_str.c_str()<<endl; return;} - + outfile<<"chr"<<"\t"<<"rs"<<"\t" <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" <<"beta"<<"\t"<<"gamma"<<endl; - + size_t t=0; for (size_t i=0; i<ns_total; ++i) { - if (indicator_snp[i]==0) {continue;} - + if (indicator_snp[i]==0) {continue;} + outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; - + outfile<<scientific<<setprecision(6)<< gsl_vector_get(alpha, t)<<"\t"; if (beta_g[t].second!=0) { @@ -172,10 +172,10 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, outfile<<0.0<<"\t"<<0.0<<endl; } t++; - } - - outfile.clear(); - outfile.close(); + } + + outfile.clear(); + outfile.close(); return; } @@ -186,17 +186,17 @@ void BSLMM::WriteParam (const gsl_vector *alpha) { ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) { - cout<<"error writing file: "<<file_str.c_str()<<endl; + cout<<"error writing file: "<<file_str.c_str()<<endl; return; } - + outfile<<"chr"<<"\t"<<"rs"<<"\t" <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" <<"beta"<<"\t"<<"gamma"<<endl; - + size_t t=0; for (size_t i=0; i<ns_total; ++i) { - if (indicator_snp[i]==0) {continue;} + if (indicator_snp[i]==0) {continue;} outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<< snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; @@ -204,14 +204,14 @@ void BSLMM::WriteParam (const gsl_vector *alpha) { gsl_vector_get(alpha, t)<<"\t"; outfile<<0.0<<"\t"<<0.0<<endl; t++; - } - - outfile.clear(); - outfile.close(); + } + + outfile.clear(); + outfile.close(); return; } -void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, +void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col) { string file_gamma, file_hyp; file_gamma=path_out+"/"+file_out; @@ -220,21 +220,21 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, file_hyp+=".hyp.txt"; ofstream outfile_gamma, outfile_hyp; - + if (flag==0) { outfile_gamma.open (file_gamma.c_str(), ofstream::out); outfile_hyp.open (file_hyp.c_str(), ofstream::out); if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; + cout<<"error writing file: "<<file_gamma<<endl; return; } if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; + cout<<"error writing file: "<<file_hyp<<endl; return; } - + outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl; - + for (size_t i=0; i<s_max; ++i) { outfile_gamma<<"s"<<i<<"\t"; } @@ -244,18 +244,18 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, outfile_gamma.open (file_gamma.c_str(), ofstream::app); outfile_hyp.open (file_hyp.c_str(), ofstream::app); if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; + cout<<"error writing file: "<<file_gamma<<endl; return; } if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; + cout<<"error writing file: "<<file_hyp<<endl; return; } - + size_t w; if (w_col==0) {w=w_pace;} else {w=w_col;} - + for (size_t i=0; i<w; ++i) { outfile_hyp<<scientific; for (size_t j=0; j<4; ++j) { @@ -267,7 +267,7 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t"; outfile_hyp<<endl; } - + for (size_t i=0; i<w; ++i) { for (size_t j=0; j<s_max; ++j) { outfile_gamma<< @@ -275,13 +275,13 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, } outfile_gamma<<endl; } - + } - + outfile_hyp.close(); outfile_hyp.clear(); outfile_gamma.close(); - outfile_gamma.clear(); + outfile_gamma.clear(); return; } @@ -300,7 +300,7 @@ void BSLMM::CalcPgamma (double *p_gamma) { return; } -void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, +void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank) { size_t pos; for (size_t i=0; i<rank.size(); ++i) { @@ -309,32 +309,32 @@ void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos); gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector); } - + return; } -double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, +double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const double sigma_a2) { - double pve, var_y; - + double pve, var_y; + gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2); gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2); gsl_matrix_set_identity (Omega); - gsl_matrix_scale (Omega, 1.0/sigma_a2); + gsl_matrix_scale (Omega, 1.0/sigma_a2); lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega); gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); CholeskySolve(Omega, Xty, OiXty); - + gsl_blas_ddot (Xty, OiXty, &pve); gsl_blas_ddot (Uty, Uty, &var_y); - + pve/=var_y; - + gsl_matrix_free (Omega); gsl_vector_free (Xty); gsl_vector_free (OiXty); @@ -342,28 +342,28 @@ double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, return pve; } -void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, - vector<size_t> &rank, class HYPBSLMM &cHyp, +void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank, class HYPBSLMM &cHyp, vector<pair<size_t, double> > &pos_loglr) { double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1); - + cHyp.n_gamma=0; for (size_t i=0; i<pos_loglr.size(); ++i) { if (2.0*pos_loglr[i].second>q_genome) {cHyp.n_gamma++;} } if (cHyp.n_gamma<10) {cHyp.n_gamma=10;} - + if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;} - if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;} - + if (cHyp.n_gamma<s_min) {cHyp.n_gamma=s_min;} + rank.clear(); for (size_t i=0; i<cHyp.n_gamma; ++i) { rank.push_back(i); } - + cHyp.logp=log((double)cHyp.n_gamma/(double)ns_test); - cHyp.h=pve_null; - + cHyp.h=pve_null; + if (cHyp.logp==0) {cHyp.logp=-0.000001;} if (cHyp.h==0) {cHyp.h=0.1;} @@ -376,114 +376,114 @@ void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, } else { sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); } - if (sigma_a2==0) {sigma_a2=0.025;} + if (sigma_a2==0) {sigma_a2=0.025;} cHyp.rho=CalcPveLM (UtXgamma, Uty, sigma_a2)/cHyp.h; gsl_matrix_free (UtXgamma); - + if (cHyp.rho>1.0) {cHyp.rho=1.0;} - + if (cHyp.h<h_min) {cHyp.h=h_min;} if (cHyp.h>h_max) {cHyp.h=h_max;} if (cHyp.rho<rho_min) {cHyp.rho=rho_min;} if (cHyp.rho>rho_max) {cHyp.rho=rho_max;} if (cHyp.logp<logp_min) {cHyp.logp=logp_min;} if (cHyp.logp>logp_max) {cHyp.logp=logp_max;} - + cout<<"initial value of h = "<<cHyp.h<<endl; cout<<"initial value of rho = "<<cHyp.rho<<endl; cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl; cout<<"initial value of |gamma| = "<<cHyp.n_gamma<<endl; - + return; } -double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *Utu, gsl_vector *alpha_prime, +double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, class HYPBSLMM &cHyp) { double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); - - gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size); + + gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size); gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size); - + double logpost=0.0; double d, ds, uy, Hi_yy=0, logdet_H=0.0; for (size_t i=0; i<ni_test; ++i) { d=gsl_vector_get (K_eval, i)*sigma_b2; ds=d/(d+1.0); - d=1.0/(d+1.0); + d=1.0/(d+1.0); gsl_vector_set (weight_Hi, i, d); - + logdet_H-=log(d); uy=gsl_vector_get (Uty, i); Hi_yy+=d*uy*uy; - - gsl_vector_set (Utu_rand, i, + + gsl_vector_set (Utu_rand, i, gsl_ran_gaussian(gsl_r, 1)*sqrt(ds)); } - + // Sample tau. double tau=1.0; if (a_mode==11) { - tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy); + tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy); } - + // Sample alpha. gsl_vector_memcpy (alpha_prime, Uty); gsl_vector_mul (alpha_prime, weight_Hi); gsl_vector_scale (alpha_prime, sigma_b2); - + // Sample u. gsl_vector_memcpy (Utu, alpha_prime); gsl_vector_mul (Utu, K_eval); if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} - gsl_vector_add (Utu, Utu_rand); - + gsl_vector_add (Utu, Utu_rand); + // For quantitative traits, calculate pve and ppe. if (a_mode==11) { gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=d/(double)ni_test; + cHyp.pve=d/(double)ni_test; cHyp.pve/=cHyp.pve+1.0/tau; - cHyp.pge=0.0; + cHyp.pge=0.0; } // Calculate likelihood. logpost=-0.5*logdet_H; if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);} else {logpost-=0.5*Hi_yy;} - + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); - + gsl_vector_free (Utu_rand); gsl_vector_free (weight_Hi); - + return logpost; } -double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *UtXb, gsl_vector *Utu, - gsl_vector *alpha_prime, gsl_vector *beta, +double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *UtXb, gsl_vector *Utu, + gsl_vector *alpha_prime, gsl_vector *beta, class HYPBSLMM &cHyp) { - clock_t time_start; - + clock_t time_start; + double sigma_a2=cHyp.h*cHyp.rho/ (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); - + double logpost=0.0; double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, - UtXgamma->size2); + + gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, + UtXgamma->size2); gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1); + gsl_vector *Utu_rand=gsl_vector_alloc (UtXgamma->size1); gsl_vector *weight_Hi=gsl_vector_alloc (UtXgamma->size1); - + gsl_matrix_memcpy (UtXgamma_eval, UtXgamma); - + logdet_H=0.0; P_yy=0.0; for (size_t i=0; i<ni_test; ++i) { gsl_vector_view UtXgamma_row= @@ -492,139 +492,139 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, ds=d/(d+1.0); d=1.0/(d+1.0); gsl_vector_set (weight_Hi, i, d); - + logdet_H-=log(d); uy=gsl_vector_get (Uty, i); P_yy+=d*uy*uy; gsl_vector_scale (&UtXgamma_row.vector, d); - + gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds)); } - + // Calculate Omega. gsl_matrix_set_identity (Omega); - + time_start=clock(); lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - + + // Calculate beta_hat. gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); logdet_O=CholeskySolve(Omega, XtHiy, beta_hat); - + gsl_vector_scale (beta_hat, sigma_a2); gsl_blas_ddot (XtHiy, beta_hat, &d); P_yy-=d; - + // Sample tau. double tau=1.0; if (a_mode==11) { - tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); + tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); } // Sample beta. for (size_t i=0; i<beta->size; i++) { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); + d=gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); } - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); - + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); + // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(beta, sqrt(sigma_a2/tau)); - gsl_vector_add(beta, beta_hat); + gsl_vector_add(beta, beta_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); - + // Sample alpha. gsl_vector_memcpy (alpha_prime, Uty); gsl_vector_sub (alpha_prime, UtXb); gsl_vector_mul (alpha_prime, weight_Hi); gsl_vector_scale (alpha_prime, sigma_b2); - + // Sample u. gsl_vector_memcpy (Utu, alpha_prime); gsl_vector_mul (Utu, K_eval); - + if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} - gsl_vector_add (Utu, Utu_rand); - + gsl_vector_add (Utu, Utu_rand); + // For quantitative traits, calculate pve and pge. if (a_mode==11) { gsl_blas_ddot (UtXb, UtXb, &d); cHyp.pge=d/(double)ni_test; - + gsl_blas_ddot (Utu, Utu, &d); cHyp.pve=cHyp.pge+d/(double)ni_test; - + if (cHyp.pve==0) {cHyp.pge=0.0;} else {cHyp.pge/=cHyp.pve;} - cHyp.pve/=cHyp.pve+1.0/tau; - } + cHyp.pve/=cHyp.pve+1.0/tau; + } gsl_matrix_free (UtXgamma_eval); gsl_matrix_free (Omega); gsl_vector_free (XtHiy); gsl_vector_free (beta_hat); - gsl_vector_free (Utu_rand); + gsl_vector_free (Utu_rand); gsl_vector_free (weight_Hi); - + logpost=-0.5*logdet_H-0.5*logdet_O; if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} else {logpost-=0.5*P_yy;} logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); - + return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, +void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; - + gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=d/(double)ni_test; - + cHyp.pve=d/(double)ni_test; + gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); - + cHyp.pve/=cHyp.pve+1.0; - cHyp.pge=0.0; - + cHyp.pge=0.0; + return; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, - const gsl_vector *Utu, gsl_vector *z_hat, +void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; gsl_vector *UtXbU=gsl_vector_alloc (Utu->size); - + gsl_blas_ddot (UtXb, UtXb, &d); cHyp.pge=d/(double)ni_test; - + gsl_blas_ddot (Utu, Utu, &d); cHyp.pve=cHyp.pge+d/(double)ni_test; - + gsl_vector_memcpy (UtXbU, Utu); gsl_vector_add (UtXbU, UtXb); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); - + gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); + if (cHyp.pve==0) {cHyp.pge=0.0;} else {cHyp.pge/=cHyp.pve;} - + cHyp.pve/=cHyp.pve+1.0; - + gsl_vector_free(UtXbU); return; } -void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, - gsl_vector *z) { +void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, + gsl_vector *z) { double d1, d2, z_rand=0.0; for (size_t i=0; i<z->size; ++i) { d1=gsl_vector_get (y, i); @@ -634,7 +634,7 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, if (d1<=0.0) { // Control, right truncated. - do { + do { z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); } while (z_rand>0.0); } @@ -643,25 +643,25 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); } while (z_rand<0.0); } - + gsl_vector_set (z, i, z_rand); } return; } -double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, +double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) { - + double h=cHyp_old.h, rho=cHyp_old.rho; - + double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale; - + for (size_t i=0; i<repeat; ++i) { h=h+(gsl_rng_uniform(gsl_r)-0.5)*d_h; if (h<h_min) {h=2*h_min-h;} if (h>h_max) {h=2*h_max-h;} - + rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho; if (rho<rho_min) {rho=2*rho_min-rho;} if (rho>rho_max) {rho=2*rho_max-rho;} @@ -671,13 +671,13 @@ double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, return 0.0; } -double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, +double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) { double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp; double log_ratio=0.0; - + double d_logp=min(0.1, (logp_max-logp_min)*logp_scale); - + for (size_t i=0; i<repeat; ++i) { logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp; if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;} @@ -686,29 +686,29 @@ double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, logp_old=logp_new; } cHyp_new.logp=logp_new; - + return log_ratio; } bool comp_vec (size_t a, size_t b) { - return (a < b); + return (a < b); } -double BSLMM::ProposeGamma (const vector<size_t> &rank_old, - vector<size_t> &rank_new, - const double *p_gamma, - const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, +double BSLMM::ProposeGamma (const vector<size_t> &rank_old, + vector<size_t> &rank_new, + const double *p_gamma, + const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { map<size_t, int> mapRank2in; size_t r; double unif, logp=0.0; int flag_gamma; size_t r_add, r_remove, col_id; - + rank_new.clear(); if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<<endl;} - + if (cHyp_old.n_gamma!=0) { for (size_t i=0; i<rank_old.size(); ++i) { r=rank_old[i]; @@ -716,29 +716,29 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, mapRank2in[r]=1; } } - cHyp_new.n_gamma=cHyp_old.n_gamma; - + cHyp_new.n_gamma=cHyp_old.n_gamma; + for (size_t i=0; i<repeat; ++i) { - unif=gsl_rng_uniform(gsl_r); - + unif=gsl_rng_uniform(gsl_r); + if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;} - else if (unif>=0.40 && unif < 0.80 && + else if (unif>=0.40 && unif < 0.80 && cHyp_new.n_gamma>s_min) { flag_gamma=2; } - else if (unif>=0.80 && cHyp_new.n_gamma>0 && + else if (unif>=0.80 && cHyp_new.n_gamma>0 && cHyp_new.n_gamma<ns_test) { flag_gamma=3; } else {flag_gamma=4;} - + if(flag_gamma==1) { // Add a SNP. do { r_add=gsl_ran_discrete (gsl_r, gsl_t); - } while (mapRank2in.count(r_add)!=0); - + } while (mapRank2in.count(r_add)!=0); + double prob_total=1.0; for (size_t i=0; i<cHyp_new.n_gamma; ++i) { r=rank_new[i]; @@ -756,14 +756,14 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, // Delete a SNP. col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); r_remove=rank_new[col_id]; - + double prob_total=1.0; for (size_t i=0; i<cHyp_new.n_gamma; ++i) { r=rank_new[i]; prob_total-=p_gamma[r]; } prob_total+=p_gamma[r_remove]; - + mapRank2in.erase(r_remove); rank_new.erase(rank_new.begin()+col_id); logp+=log(p_gamma[r_remove]/prob_total)+ @@ -779,18 +779,18 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, // Be careful with the proposal. do { r_add=gsl_ran_discrete (gsl_r, gsl_t); - } while (mapRank2in.count(r_add)!=0); - + } while (mapRank2in.count(r_add)!=0); + double prob_total=1.0; for (size_t i=0; i<cHyp_new.n_gamma; ++i) { r=rank_new[i]; prob_total-=p_gamma[r]; } - + logp+=log(p_gamma[r_remove]/ (prob_total+p_gamma[r_remove]-p_gamma[r_add])); logp-=log(p_gamma[r_add]/prob_total); - + mapRank2in.erase(r_remove); mapRank2in[r_add]=1; rank_new.erase(rank_new.begin()+col_id); @@ -798,7 +798,7 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, } else {logp+=0;} // Do not change. } - + stable_sort (rank_new.begin(), rank_new.end(), comp_vec); mapRank2in.clear(); @@ -806,54 +806,54 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, } bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) { - return (a.second > b.second); + return (a.second > b.second); } // If a_mode==13 then Uty==y. -void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, +void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) { - clock_t time_start; + clock_t time_start; class HYPBSLMM cHyp_old, cHyp_new; - + gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - - gsl_vector *alpha_prime=gsl_vector_alloc (ni_test); + gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); + + gsl_vector *alpha_prime=gsl_vector_alloc (ni_test); gsl_vector *alpha_new=gsl_vector_alloc (ni_test); - gsl_vector *alpha_old=gsl_vector_alloc (ni_test); + gsl_vector *alpha_old=gsl_vector_alloc (ni_test); gsl_vector *Utu=gsl_vector_alloc (ni_test); gsl_vector *Utu_new=gsl_vector_alloc (ni_test); gsl_vector *Utu_old=gsl_vector_alloc (ni_test); - + gsl_vector *UtXb_new=gsl_vector_alloc (ni_test); gsl_vector *UtXb_old=gsl_vector_alloc (ni_test); - + gsl_vector *z_hat=gsl_vector_alloc (ni_test); gsl_vector *z=gsl_vector_alloc (ni_test); - gsl_vector *Utz=gsl_vector_alloc (ni_test); + gsl_vector *Utz=gsl_vector_alloc (ni_test); + + gsl_vector_memcpy (Utz, Uty); - gsl_vector_memcpy (Utz, Uty); - double logPost_new, logPost_old; double logMHratio; - double mean_z=0.0; - + double mean_z=0.0; + gsl_matrix_set_zero (Result_gamma); gsl_vector_set_zero (Utu); gsl_vector_set_zero (alpha_prime); if (a_mode==13) { pheno_mean=0.0; } - + vector<pair<double, double> > beta_g; for (size_t i=0; i<ns_test; i++) { beta_g.push_back(make_pair(0.0, 0.0)); } - + vector<size_t> rank_new, rank_old; - vector<double> beta_new, beta_old; + vector<double> beta_new, beta_old; vector<pair<size_t, double> > pos_loglr; @@ -865,59 +865,59 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, for (size_t i=0; i<ns_test; ++i) { mapRank2pos[i]=pos_loglr[i].first; } - + // Calculate proposal distribution for gamma (unnormalized), // and set up gsl_r and gsl_t. gsl_rng_env_setup(); const gsl_rng_type * gslType; - gslType = gsl_rng_default; + gslType = gsl_rng_default; if (randseed<0) { time_t rawtime; time (&rawtime); tm * ptm = gmtime (&rawtime); - + randseed = (unsigned) (ptm->tm_hour%24*3600+ ptm->tm_min*60+ptm->tm_sec); } - gsl_r = gsl_rng_alloc(gslType); + gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; + + double *p_gamma = new double[ns_test]; CalcPgamma (p_gamma); - + gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - + // Initial parameters. InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr); - + cHyp_initial=cHyp_old; - + if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, + logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); beta_old.clear(); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { beta_old.push_back(0); - } + } } else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, + gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma); - SetXgamma (UtXgamma, UtX, rank_old); - logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, + SetXgamma (UtXgamma, UtX, rank_old); + logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); - + beta_old.clear(); for (size_t i=0; i<beta->size; ++i) { beta_old.push_back(gsl_vector_get(beta, i)); - } + } gsl_matrix_free (UtXgamma); gsl_vector_free (beta); - } - + } + // Calculate centered z_hat, and pve. if (a_mode==13) { time_start=clock(); @@ -929,28 +929,28 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, } time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } - + // Start MCMC. int accept; size_t total_step=w_step+s_step; size_t w=0, w_col, pos; size_t repeat=0; - + for (size_t t=0; t<total_step; ++t) { if (t%d_pace==0 || t==total_step-1) { ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1)); } - if (a_mode==13) { - SampleZ (y, z_hat, z); - mean_z=CenterVector (z); - + if (a_mode==13) { + SampleZ (y, z_hat, z); + mean_z=CenterVector (z); + time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz); time_UtZ+=(clock()-time_start)/ (double(CLOCKS_PER_SEC)*60.0); - + // First proposal. if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { logPost_old= @@ -959,7 +959,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, beta_old.clear(); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { beta_old.push_back(0); - } + } } else { gsl_matrix *UtXgamma= @@ -971,7 +971,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); - + beta_old.clear(); for (size_t i=0; i<beta->size; ++i) { beta_old.push_back(gsl_vector_get(beta, i)); @@ -980,7 +980,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, gsl_vector_free (beta); } } - + // M-H steps. for (size_t i=0; i<n_mh; ++i) { if (gsl_rng_uniform(gsl_r)<0.33) { @@ -989,20 +989,20 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, else { repeat=1; } - + logMHratio=0.0; logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat); logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); - + if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) { logPost_new=CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new); beta_new.clear(); for (size_t i=0; i<cHyp_new.n_gamma; ++i) { beta_new.push_back(0); - } + } } else { gsl_matrix *UtXgamma= @@ -1020,17 +1020,17 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, } gsl_matrix_free (UtXgamma); gsl_vector_free (beta); - } - - logMHratio+=logPost_new-logPost_old; - + } + + logMHratio+=logPost_new-logPost_old; + if (logMHratio>0 || log(gsl_rng_uniform(gsl_r))<logMHratio) { accept=1; n_accept++; } else {accept=0;} - if (accept==1) { + if (accept==1) { logPost_old=logPost_new; rank_old.clear(); beta_old.clear(); if (rank_new.size()!=0) { @@ -1045,8 +1045,8 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, gsl_vector_memcpy (Utu_old, Utu_new); } else {cHyp_new=cHyp_old;} - } - + } + // Calculate z_hat, and pve. if (a_mode==13) { time_start=clock(); @@ -1057,21 +1057,21 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old); } - + // Sample mu and update z_hat. gsl_vector_sub (z, z_hat); mean_z+=CenterVector(z); mean_z+= - gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test)); + gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test)); gsl_vector_add_constant (z_hat, mean_z); - + time_UtZ+=(clock()-time_start)/ (double(CLOCKS_PER_SEC)*60.0); } - + // Save data. if (t<w_step) {continue;} - else { + else { if (t%r_pace==0) { w_col=w%w_pace; if (w_col==0) { @@ -1086,76 +1086,76 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, gsl_matrix_set_zero (Result_gamma); } } - + gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h); gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve); gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho); gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge); gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp); gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma); - + for (size_t i=0; i<cHyp_old.n_gamma; ++i) { pos=mapRank2pos[rank_old[i]]+1; gsl_matrix_set(Result_gamma,w_col,i, pos); - + beta_g[pos-1].first+=beta_old[i]; - beta_g[pos-1].second+=1.0; + beta_g[pos-1].second+=1.0; } - + gsl_vector_add (alpha_prime, alpha_old); gsl_vector_add (Utu, Utu_old); - + if (a_mode==13) { pheno_mean+=mean_z; } - + w++; - + } - + } } cout<<endl; - + w_col=w%w_pace; - WriteResult (1, Result_hyp, Result_gamma, w_col); - + WriteResult (1, Result_hyp, Result_gamma, w_col); + gsl_matrix_free(Result_hyp); - gsl_matrix_free(Result_gamma); - + gsl_matrix_free(Result_gamma); + gsl_vector_free(z_hat); gsl_vector_free(z); - gsl_vector_free(Utz); - gsl_vector_free(UtXb_new); + gsl_vector_free(Utz); + gsl_vector_free(UtXb_new); gsl_vector_free(UtXb_old); - gsl_vector_free(alpha_new); + gsl_vector_free(alpha_new); gsl_vector_free(alpha_old); - gsl_vector_free(Utu_new); - gsl_vector_free(Utu_old); - - gsl_vector_scale (alpha_prime, 1.0/(double)w); - gsl_vector_scale (Utu, 1.0/(double)w); + gsl_vector_free(Utu_new); + gsl_vector_free(Utu_old); + + gsl_vector_scale (alpha_prime, 1.0/(double)w); + gsl_vector_scale (Utu, 1.0/(double)w); if (a_mode==13) { pheno_mean/=(double)w; } - + gsl_vector *alpha=gsl_vector_alloc (ns_test); gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX, - alpha_prime, 0.0, alpha); + alpha_prime, 0.0, alpha); WriteParam (beta_g, alpha, w); gsl_vector_free(alpha); - + gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime); - WriteBV(alpha_prime); - - gsl_vector_free(alpha_prime); - gsl_vector_free(Utu); - + WriteBV(alpha_prime); + + gsl_vector_free(alpha_prime); + gsl_vector_free(Utu); + delete [] p_gamma; beta_g.clear(); - + return; } @@ -1169,9 +1169,9 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, gsl_vector_memcpy (H_eval, eval); gsl_vector_scale (H_eval, lambda); gsl_vector_add_constant (H_eval, 1.0); - + gsl_vector_memcpy (bv, Uty); - gsl_vector_div (bv, H_eval); + gsl_vector_div (bv, H_eval); gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2, UtX, bv, 0.0, beta); @@ -1181,18 +1181,18 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, WriteParam (beta); WriteBV(bv); - + gsl_vector_free (H_eval); gsl_vector_free (beta); gsl_vector_free (bv); - + return; } // Below fits MCMC for rho=1. void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { - time_t time_start=clock(); + time_t time_start=clock(); gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); @@ -1271,7 +1271,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, for (size_t i=0; i<rank_union.size(); i++) { if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;} - gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); + gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old); gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); @@ -1290,7 +1290,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, j_old++; j_new++; } i_old++; i_new++; - } + } } else { gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() ); gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2); @@ -1302,7 +1302,7 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, // Get t(X_add)X_add and t(X_add)X_temp. clock_t time_start=clock(); - + // Somehow the lapack_dgemm does not work here. gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); @@ -1325,15 +1325,15 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, i_flag=0; } - gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); + gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); if (i_flag==1) { gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add); gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); } else { gsl_vector_const_view Xcopy_col= - gsl_matrix_const_column(X_old, i_old); + gsl_matrix_const_column(X_old, i_old); gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); - } + } if (i_flag==1) { d=gsl_vector_get (Xty_add, i_add); @@ -1385,34 +1385,34 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, rank_union.clear(); mapRank2in_remove.clear(); mapRank2in_add.clear(); - + return; } -double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) { +double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) { double logpost=0.0; - + // For quantitative traits, calculate pve and pge. // Pve and pge for case/control data are calculted in CalcCC_PVEnZ. if (a_mode==11) { cHyp.pve=0.0; - cHyp.pge=1.0; + cHyp.pge=1.0; } // Calculate likelihood. if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);} else {logpost-=0.5*yty;} - + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); - + return logpost; } double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, - gsl_vector *beta, class HYPBSLMM &cHyp) { + gsl_vector *beta, class HYPBSLMM &cHyp) { double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); double logpost=0.0; double d, P_yy=yty, logdet_O=0.0; @@ -1423,10 +1423,10 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); gsl_vector_const_view Xty_sub= gsl_vector_const_subvector (Xty, 0, s_size); - + gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size); gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size); - gsl_vector *beta_hat=gsl_vector_alloc (s_size); + gsl_vector *beta_hat=gsl_vector_alloc (s_size); gsl_vector *Xty_temp=gsl_vector_alloc (s_size); gsl_vector_memcpy (Xty_temp, &Xty_sub.vector); @@ -1436,9 +1436,9 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, gsl_matrix_scale (Omega, sigma_a2); gsl_matrix_set_identity (M_temp); gsl_matrix_add (Omega, M_temp); - + // Calculate beta_hat. - logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); + logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); gsl_vector_scale (beta_hat, sigma_a2); gsl_blas_ddot (Xty_temp, beta_hat, &d); @@ -1453,27 +1453,27 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, // Sample beta. for (size_t i=0; i<s_size; i++) { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); + d=gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); } gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size); gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, - &beta_sub.vector); - + &beta_sub.vector); + // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau)); - gsl_vector_add(&beta_sub.vector, beta_hat); + gsl_vector_add(&beta_sub.vector, beta_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, - &beta_sub.vector, 0.0, Xb); - + &beta_sub.vector, 0.0, Xb); + // For quantitative traits, calculate pve and pge. if (a_mode==11) { gsl_blas_ddot (Xb, Xb, &d); cHyp.pve=d/(double)ni_test; cHyp.pve/=cHyp.pve+1.0/tau; - cHyp.pge=1.0; - } - + cHyp.pge=1.0; + } + logpost=-0.5*logdet_O; if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} else {logpost-=0.5*P_yy;} @@ -1490,11 +1490,11 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) +void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) { gsl_vector_set_zero(z_hat); cHyp.pve=0.0; - cHyp.pge=1.0; + cHyp.pge=1.0; return; } @@ -1502,12 +1502,12 @@ void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; - + gsl_blas_ddot (Xb, Xb, &d); cHyp.pve=d/(double)ni_test; cHyp.pve/=cHyp.pve+1.0; cHyp.pge=1.0; - + gsl_vector_memcpy (z_hat, Xb); return; @@ -1515,16 +1515,16 @@ void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, // If a_mode==13, then run probit model. void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { - clock_t time_start; + clock_t time_start; double time_set=0, time_post=0; class HYPBSLMM cHyp_old, cHyp_new; - + gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - + gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); + gsl_vector *Xb_new=gsl_vector_alloc (ni_test); - gsl_vector *Xb_old=gsl_vector_alloc (ni_test); + gsl_vector *Xb_old=gsl_vector_alloc (ni_test); gsl_vector *z_hat=gsl_vector_alloc (ni_test); gsl_vector *z=gsl_vector_alloc (ni_test); @@ -1540,28 +1540,28 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { double ztz=0.0; gsl_vector_memcpy (z, y); - + // For quantitative traits, y is centered already in // gemma.cpp, but just in case. - double mean_z=CenterVector (z); + double mean_z=CenterVector (z); gsl_blas_ddot(z, z, &ztz); double logPost_new, logPost_old; double logMHratio; - + gsl_matrix_set_zero (Result_gamma); if (a_mode==13) { pheno_mean=0.0; } - + vector<pair<double, double> > beta_g; for (size_t i=0; i<ns_test; i++) { beta_g.push_back(make_pair(0.0, 0.0)); } - + vector<size_t> rank_new, rank_old; vector<pair<size_t, double> > pos_loglr; - + time_start=clock(); MatrixCalcLmLR (X, z, pos_loglr); time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -1570,44 +1570,44 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { for (size_t i=0; i<ns_test; ++i) { mapRank2pos[i]=pos_loglr[i].first; } - + // Calculate proposal distribution for gamma (unnormalized), // and set up gsl_r and gsl_t. - gsl_rng_env_setup(); + gsl_rng_env_setup(); const gsl_rng_type * gslType; - gslType = gsl_rng_default; + gslType = gsl_rng_default; if (randseed<0) { time_t rawtime; time (&rawtime); tm * ptm = gmtime (&rawtime); - + randseed = (unsigned) (ptm->tm_hour%24*3600+ ptm->tm_min*60+ptm->tm_sec); } - gsl_r = gsl_rng_alloc(gslType); + gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; + + double *p_gamma = new double[ns_test]; CalcPgamma (p_gamma); - + gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - + // Initial parameters. InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr); - + cHyp_initial=cHyp_old; - if (cHyp_old.n_gamma==0) { + if (cHyp_old.n_gamma==0) { logPost_old=CalcPosterior (ztz, cHyp_old); } - else { - SetXgamma (Xgamma_old, X, rank_old); + else { + SetXgamma (Xgamma_old, X, rank_old); CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); - } + } // Calculate centered z_hat, and pve. if (a_mode==13) { @@ -1618,28 +1618,28 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); } } - + // Start MCMC. int accept; size_t total_step=w_step+s_step; size_t w=0, w_col, pos; size_t repeat=0; - + for (size_t t=0; t<total_step; ++t) { if (t%d_pace==0 || t==total_step-1) { ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1)); } - if (a_mode==13) { - SampleZ (y, z_hat, z); + if (a_mode==13) { + SampleZ (y, z_hat, z); mean_z=CenterVector (z); gsl_blas_ddot(z,z,&ztz); - + // First proposal. - if (cHyp_old.n_gamma==0) { + if (cHyp_old.n_gamma==0) { logPost_old=CalcPosterior (ztz, cHyp_old); - } else { + } else { gsl_matrix_view Xold_sub= gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size()); @@ -1651,7 +1651,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); - } + } } // M-H steps. @@ -1663,23 +1663,23 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { logMHratio=0.0; logMHratio+= - ProposeHnRho(cHyp_old, cHyp_new, repeat); + ProposeHnRho(cHyp_old, cHyp_new, repeat); logMHratio+= ProposeGamma (rank_old, rank_new, p_gamma, - cHyp_old, cHyp_new, repeat); + cHyp_old, cHyp_new, repeat); logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); - + if (cHyp_new.n_gamma==0) { logPost_new=CalcPosterior (ztz, cHyp_new); } else { - + // This makes sure that rank_old.size() == // rank_remove.size() does not happen. if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) { time_start=clock(); - SetXgamma (Xgamma_new, X, rank_new); + SetXgamma (Xgamma_new, X, rank_new); CalcXtX (Xgamma_new, z, rank_new.size(), - XtX_new, Xtz_new); + XtX_new, Xtz_new); time_set+=(clock()-time_start)/ (double(CLOCKS_PER_SEC)*60.0); } else { @@ -1697,17 +1697,17 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { cHyp_new); time_post+=(clock()-time_start)/ (double(CLOCKS_PER_SEC)*60.0); - } - logMHratio+=logPost_new-logPost_old; - + } + logMHratio+=logPost_new-logPost_old; + if (logMHratio>0 || log(gsl_rng_uniform(gsl_r))<logMHratio) { accept=1; n_accept++; } else {accept=0;} - - if (accept==1) { + + if (accept==1) { logPost_old=logPost_new; cHyp_old=cHyp_new; gsl_vector_memcpy (Xb_old, Xb_new); @@ -1719,7 +1719,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { ++i) { rank_old.push_back(rank_new[i]); } - + gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); gsl_matrix_view XtXold_sub=gsl_matrix_submatrix(XtX_old, 0, 0, rank_new.size(), rank_new.size()); gsl_vector_view Xtzold_sub=gsl_vector_subvector(Xtz_old, 0, rank_new.size()); @@ -1742,8 +1742,8 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { } else { cHyp_new=cHyp_old; } - - } + + } // Calculate z_hat, and pve. if (a_mode==13) { @@ -1753,19 +1753,19 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { else { CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); } - + // Sample mu and update z_hat. gsl_vector_sub (z, z_hat); mean_z+=CenterVector(z); mean_z+=gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test)); - + gsl_vector_add_constant (z_hat, mean_z); } - + // Save data. if (t<w_step) {continue;} - else { + else { if (t%r_pace==0) { w_col=w%w_pace; if (w_col==0) { @@ -1793,21 +1793,21 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { cHyp_old.logp); gsl_matrix_set(Result_hyp,w_col,5, cHyp_old.n_gamma); - + for (size_t i=0; i<cHyp_old.n_gamma; ++i) { pos=mapRank2pos[rank_old[i]]+1; gsl_matrix_set(Result_gamma,w_col, i,pos); - + beta_g[pos-1].first+= gsl_vector_get(beta_old, i); - beta_g[pos-1].second+=1.0; + beta_g[pos-1].second+=1.0; } - + if (a_mode==13) { pheno_mean+=mean_z; } - + w++; } } @@ -1818,19 +1818,19 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { cout<<"time on calculating posterior: "<<time_post<<endl; w_col=w%w_pace; - WriteResult (1, Result_hyp, Result_gamma, w_col); - + WriteResult (1, Result_hyp, Result_gamma, w_col); + gsl_vector *alpha=gsl_vector_alloc (ns_test); gsl_vector_set_zero (alpha); WriteParam (beta_g, alpha, w); gsl_vector_free(alpha); gsl_matrix_free(Result_hyp); - gsl_matrix_free(Result_gamma); - + gsl_matrix_free(Result_gamma); + gsl_vector_free(z_hat); gsl_vector_free(z); - gsl_vector_free(Xb_new); + gsl_vector_free(Xb_new); gsl_vector_free(Xb_old); gsl_matrix_free(Xgamma_old); @@ -1842,9 +1842,9 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { gsl_matrix_free(XtX_new); gsl_vector_free(Xtz_new); gsl_vector_free(beta_new); - + delete [] p_gamma; beta_g.clear(); - + return; } diff --git a/src/bslmm.h b/src/bslmm.h index da185fa..c7768a2 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -1,22 +1,22 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __BSLMM_H__ +#ifndef __BSLMM_H__ #define __BSLMM_H__ #include <vector> @@ -30,23 +30,23 @@ using namespace std; class BSLMM { -public: +public: // IO-related parameters. - int a_mode; + int a_mode; size_t d_pace; - + string file_bfile; string file_geno; string file_out; string path_out; - + // LMM-related parameters. double l_min; double l_max; size_t n_region; double pve_null; double pheno_mean; - + // BSLMM MCMC-related parameters double h_min, h_max, h_scale; // Priors for h. double rho_min, rho_max, rho_scale; // Priors for rho. @@ -61,8 +61,8 @@ public: size_t n_mh; // Number of MH steps per iter. double geo_mean; // Mean of geometric dist. long int randseed; - double trace_G; - + double trace_G; + HYPBSLMM cHyp_initial; // Summary statistics. @@ -74,32 +74,32 @@ public: // Time spent on constructing the proposal distribution for // gamma (i.e. lmm or lm analysis). - double time_Proposal; + double time_Proposal; // Indicator for individuals (phenotypes): 0 missing, 1 // available for analysis. - vector<int> indicator_idv; + vector<int> indicator_idv; // Sequence indicator for SNPs: 0 ignored because of (a) maf, // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; + vector<int> indicator_snp; // Record SNP information. - vector<SNPINFO> snpInfo; - + vector<SNPINFO> snpInfo; + // Not included in PARAM. - gsl_rng *gsl_r; - gsl_ran_discrete_t *gsl_t; - map<size_t, size_t> mapRank2pos; - + gsl_rng *gsl_r; + gsl_ran_discrete_t *gsl_t; + map<size_t, size_t> mapRank2pos; + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); - + void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *eval, const double lambda); - + void MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y); @@ -111,10 +111,10 @@ public: void WriteParam (const gsl_vector *alpha); void WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col); - + // Subfunctions inside MCMC. void CalcPgamma (double *p_gammar); - + double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const double sigma_a2); void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp index ebbff70..e1a53a6 100644 --- a/src/bslmmdap.cpp +++ b/src/bslmmdap.cpp @@ -92,13 +92,13 @@ void BSLMMDAP::CopyToParam (PARAM &cPar) { // Read hyp file. -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, +void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab) { vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear(); igzstream infile (file_hyp.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open hyp file: "<<file_hyp<<endl; + cout<<"error! fail to open hyp file: "<<file_hyp<<endl; return; } @@ -128,7 +128,7 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, } // Read bf file. -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, +void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF) { BF.clear(); vec_rs.clear(); @@ -196,12 +196,12 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, // Read category files. // Read both continuous and discrete category file, record mapRS2catc. -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, +void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd) { igzstream infile (file_cat.c_str(), igzstream::in); if (!infile) { - cout<<"error! fail to open category file: "<<file_cat<<endl; + cout<<"error! fail to open category file: "<<file_cat<<endl; return; } @@ -323,11 +323,11 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) { outfile_hyp.open (file_hyp.c_str(), ofstream::out); if (!outfile_bf) { - cout<<"error writing file: "<<file_bf<<endl; + cout<<"error writing file: "<<file_bf<<endl; return; } if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; + cout<<"error writing file: "<<file_hyp<<endl; return; } @@ -370,8 +370,8 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) { return; } -void BSLMMDAP::WriteResult (const vector<string> &vec_rs, - const gsl_matrix *Hyper, const gsl_vector *pip, +void BSLMMDAP::WriteResult (const vector<string> &vec_rs, + const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef) { string file_gamma, file_hyp, file_coef; file_gamma=path_out+"/"+file_out; @@ -388,15 +388,15 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs, outfile_coef.open (file_coef.c_str(), ofstream::out); if (!outfile_gamma) { - cout<<"error writing file: "<<file_gamma<<endl; + cout<<"error writing file: "<<file_gamma<<endl; return; } if (!outfile_hyp) { - cout<<"error writing file: "<<file_hyp<<endl; + cout<<"error writing file: "<<file_hyp<<endl; return; } if (!outfile_coef) { - cout<<"error writing file: "<<file_coef<<endl; + cout<<"error writing file: "<<file_coef<<endl; return; } @@ -432,8 +432,8 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs, } -double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, - const gsl_vector *K_eval, +double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, + const gsl_vector *K_eval, const double sigma_b2, const double tau) { gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size); @@ -457,16 +457,16 @@ double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, return logm; } -double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, - const gsl_vector *K_eval, - const double sigma_a2, +double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, + const gsl_vector *K_eval, + const double sigma_a2, const double sigma_b2, const double tau) { clock_t time_start; double logm=0.0; double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, + gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, UtXgamma->size2); gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); @@ -526,8 +526,8 @@ double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) { } // Where A is the ni_test by n_cat matrix of annotations. -void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, +void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) { clock_t time_start; @@ -601,9 +601,9 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, return; } -void single_ct_regression(const gsl_matrix_int *Xd, +void single_ct_regression(const gsl_matrix_int *Xd, const gsl_vector_int *dlevel, - const gsl_vector *pip_vec, + const gsl_vector *pip_vec, gsl_vector *coef, gsl_vector *prior_vec) { map<int,double> sum_pip; @@ -635,13 +635,13 @@ void single_ct_regression(const gsl_matrix_int *Xd, } // Where A is the ni_test by n_cat matrix of annotations. -void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, - const vector<string> &vec_rs, - const vector<double> &vec_sa2, - const vector<double> &vec_sb2, - const vector<double> &wab, - const vector<vector<vector<double> > > &BF, - gsl_matrix *Ac, gsl_matrix_int *Ad, +void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, + const vector<string> &vec_rs, + const vector<double> &vec_sa2, + const vector<double> &vec_sb2, + const vector<double> &wab, + const vector<vector<vector<double> > > &BF, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) { clock_t time_start; diff --git a/src/bslmmdap.h b/src/bslmmdap.h index 8445669..db5774b 100644 --- a/src/bslmmdap.h +++ b/src/bslmmdap.h @@ -78,35 +78,35 @@ public: void CopyToParam (PARAM &cPar); void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF); - void WriteResult (const vector<string> &vec_rs, - const gsl_matrix *Hyper, const gsl_vector *pip, + void WriteResult (const vector<string> &vec_rs, + const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef); - double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, + double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau); - double CalcMarginal (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - const double sigma_a2, const double sigma_b2, + double CalcMarginal (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double sigma_a2, const double sigma_b2, const double tau); double CalcPrior (class HYPBSLMM &cHyp); - void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, + void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y); - void DAP_EstimateHyper (const size_t kc, const size_t kd, - const vector<string> &vec_rs, - const vector<double> &vec_sa2, - const vector<double> &vec_sb2, - const vector<double> &wab, - const vector<vector<vector<double> > > &BF, - gsl_matrix *Ac, gsl_matrix_int *Ad, + void DAP_EstimateHyper (const size_t kc, const size_t kd, + const vector<string> &vec_rs, + const vector<double> &vec_sa2, + const vector<double> &vec_sb2, + const vector<double> &wab, + const vector<vector<vector<double> > > &BF, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel); }; -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, +void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab); -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, +void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF); -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, +void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd); diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp index 7ad250f..733dae1 100644 --- a/src/eigenlib.cpp +++ b/src/eigenlib.cpp @@ -28,9 +28,9 @@ using namespace std; using namespace Eigen; // On two different clusters, compare eigen vs lapack/gsl: -// +// // dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl -// dgemv, 20x or 4x faster than gsl, +// dgemv, 20x or 4x faster than gsl, // eigen, 1x or 0.3x slower than lapack // invert, 20x or 10x faster than lapack // diff --git a/src/eigenlib.h b/src/eigenlib.h index 8cb8880..3659dc1 100644 --- a/src/eigenlib.h +++ b/src/eigenlib.h @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __EIGENLIB_H__ +#ifndef __EIGENLIB_H__ #define __EIGENLIB_H__ #include <vector> diff --git a/src/gemma.cpp b/src/gemma.cpp index 18179f3..f4902fe 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -31,7 +31,7 @@ #include "gsl/gsl_eigen.h" #include "gsl/gsl_cdf.h" -#include "lapack.h" +#include "lapack.h" #include "io.h" #include "gemma.h" #include "vc.h" @@ -211,7 +211,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" ./gemma -g [filename] -p [filename] -calccor -o [prefix]"<<endl; cout<<endl; } - + if (option==2) { cout<<" FILE I/O RELATED OPTIONS" << endl; cout<<" -bfile [prefix] "<<" specify input PLINK binary ped file prefix."<<endl; @@ -231,11 +231,11 @@ void GEMMA::PrintHelp(size_t option) { cout<<" format: rs#1, base_position, chr_number"<<endl; cout<<" rs#2, base_position, chr_number"<<endl; cout<<" ..."<<endl; - + // WJA added. cout<<" -oxford [prefix] "<<" specify input Oxford genotype bgen file prefix."<<endl; cout<<" requires: *.bgen, *.sample files"<<endl; - + cout<<" -gxe [filename] "<<" specify input file that contains a column of environmental factor for g by e tests"<<endl; cout<<" format: variable for individual 1"<<endl; cout<<" variable for individual 2"<<endl; @@ -308,7 +308,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -notsnp "<<" minor allele frequency cutoff is not used" << endl; cout<<endl; } - + if (option==4) { cout<<" RELATEDNESS MATRIX CALCULATION OPTIONS" << endl; cout<<" -gk [num] "<<" specify which type of kinship/relatedness matrix to generate (default 1)" << endl; @@ -317,13 +317,13 @@ void GEMMA::PrintHelp(size_t option) { cout<<" note: non-polymorphic SNPs are excluded "<<endl; cout<<endl; } - + if (option==5) { cout<<" EIGEN-DECOMPOSITION OPTIONS" << endl; cout<<" -eigen "<<" specify to perform eigen decomposition of the loaded relatedness matrix" << endl; cout<<endl; } - + if (option==6) { cout<<" VARIANCE COMPONENT ESTIMATION OPTIONS" << endl; cout<<" -vc "<<" specify to perform variance component estimation for the loaded relatedness matrix/matrices" << endl; @@ -336,7 +336,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -crt -windowns [num]"<<" specify the window size based on number of snps (default 0)"<<endl; cout<<endl; } - + if (option==7) { cout<<" LINEAR MODEL OPTIONS" << endl; cout<<" -lm [num] "<<" specify analysis options (default 1)."<<endl; @@ -346,7 +346,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" 4: 1-3"<<endl; cout<<endl; } - + if (option==8) { cout<<" LINEAR MIXED MODEL OPTIONS" << endl; cout<<" -lmm [num] "<<" specify analysis options (default 1)."<<endl; @@ -360,7 +360,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -region [num] "<<" specify the number of regions used to evaluate lambda (default 10)" << endl; cout<<endl; } - + if (option==9) { cout<<" MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; cout<<" -pnr "<<" specify the pvalue threshold to use the Newton-Raphson's method (default 0.001)"<<endl; @@ -371,7 +371,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -crt "<<" specify to output corrected pvalues for these pvalues that are below the -pnr threshold"<<endl; cout<<endl; } - + if (option==10) { cout<<" MULTI-LOCUS ANALYSIS OPTIONS" << endl; cout<<" -bslmm [num] "<<" specify analysis options (default 1)."<<endl; @@ -380,10 +380,10 @@ void GEMMA::PrintHelp(size_t option) { cout<<" 3: probit BSLMM (requires 0/1 phenotypes)"<<endl; cout<<" 4: BSLMM with DAP for Hyper Parameter Estimation"<<endl; cout<<" 5: BSLMM with DAP for Fine Mapping"<<endl; - + cout<<" -ldr [num] "<<" specify analysis options (default 1)."<<endl; cout<<" options: 1: LDR"<<endl; - + cout<<" MCMC OPTIONS" << endl; cout<<" Prior" << endl; cout<<" -hmin [num] "<<" specify minimum value for h (default 0)" << endl; @@ -394,13 +394,13 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -pmax [num] "<<" specify maximum value for log10(pi) (default log10(1) )" << endl; cout<<" -smin [num] "<<" specify minimum value for |gamma| (default 0)" << endl; cout<<" -smax [num] "<<" specify maximum value for |gamma| (default 300)" << endl; - + cout<<" Proposal" << endl; cout<<" -gmean [num] "<<" specify the mean for the geometric distribution (default: 2000)" << endl; cout<<" -hscale [num] "<<" specify the step size scale for the proposal distribution of h (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; cout<<" -rscale [num] "<<" specify the step size scale for the proposal distribution of rho (value between 0 and 1, default min(10/sqrt(n),1) )" << endl; cout<<" -pscale [num] "<<" specify the step size scale for the proposal distribution of log10(pi) (value between 0 and 1, default min(5/sqrt(n),1) )" << endl; - + cout<<" Others" << endl; cout<<" -w [num] "<<" specify burn-in steps (default 100,000)" << endl; cout<<" -s [num] "<<" specify sampling steps (default 1,000,000)" << endl; @@ -411,7 +411,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" requires: 0/1 phenotypes and -bslmm 3 option"<<endl; cout<<endl; } - + if (option==11) { cout<<" PREDICTION OPTIONS" << endl; cout<<" -predict [num] "<<" specify prediction options (default 1)."<<endl; @@ -419,7 +419,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" 2: predict for individuals with missing phenotypes, and convert the predicted values to probability scale. Use only for files fitted with -bslmm 3 option"<<endl; cout<<endl; } - + if (option==12) { cout<<" CALC CORRELATION OPTIONS" << endl; cout<<" -calccor "<<endl; @@ -428,7 +428,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" -windowns [num] "<<" specify the window size based on number of snps (default 0; not used)" << endl; cout<<endl; } - + if (option==13) { cout<<" NOTE"<<endl; cout<<" 1. Only individuals with non-missing phenotoypes and covariates will be analyzed."<<endl; @@ -438,7 +438,7 @@ void GEMMA::PrintHelp(size_t option) { cout<<" 5. For bslmm analysis, in addition to 3, memory should be large enough to hold the whole genotype matrix."<<endl; cout<<endl; } - + return; } @@ -522,7 +522,7 @@ void GEMMA::Assign(int argc, char ** argv, PARAM &cPar) { str.assign(argv[i]); cPar.file_anno=str; } - + // WJA added. else if (strcmp(argv[i], "-oxford")==0 || strcmp(argv[i], "--oxford")==0 || @@ -1262,7 +1262,7 @@ void GEMMA::BatchRun (PARAM &cPar) { gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph); gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt); - + //set covariates matrix W and phenotype matrix Y //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); @@ -1545,7 +1545,7 @@ void GEMMA::BatchRun (PARAM &cPar) { cVarcov.CopyToParam(cPar); } - + // LM. if (cPar.a_mode==51 || cPar.a_mode==52 || cPar.a_mode==53 || cPar.a_mode==54) { //Fit LM gsl_matrix *Y=gsl_matrix_alloc (cPar.ni_test, cPar.n_ph); @@ -2581,7 +2581,7 @@ void GEMMA::BatchRun (PARAM &cPar) { } else { kc=0; kd=0; } - + cout<<"## number of blocks = "<<BF.size()<<endl; cout<<"## number of analyzed SNPs = "<<vec_rs.size()<<endl; cout<<"## grid size for hyperparameters = "<<wab.size()<<endl; diff --git a/src/gemma.h b/src/gemma.h index 8393470..78828ef 100644 --- a/src/gemma.h +++ b/src/gemma.h @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __GEMMA_H__ +#ifndef __GEMMA_H__ #define __GEMMA_H__ #include "param.h" @@ -25,15 +25,15 @@ using namespace std; class GEMMA { -public: +public: // Parameters. string version; string date; string year; - + // Constructor. GEMMA(void); - + // Functions. void PrintHeader (void); void PrintHelp (size_t option); diff --git a/src/gzstream.cpp b/src/gzstream.cpp index bbb4ba8..688b625 100644 --- a/src/gzstream.cpp +++ b/src/gzstream.cpp @@ -21,8 +21,8 @@ // Revision : $Revision: 1.7 $ // Revision_date : $Date: 2003/01/08 14:41:27 $ // Author(s) : Deepak Bandyopadhyay, Lutz Kettner -// -// Standard streambuf implementation following Nicolai Josuttis, "The +// +// Standard streambuf implementation following Nicolai Josuttis, "The // Standard C++ Library". // ============================================================================ @@ -97,7 +97,7 @@ int gzstreambuf::underflow() { // used for input buffer only buffer + 4 + num); // end of buffer // return next character - return * reinterpret_cast<unsigned char *>( gptr()); + return * reinterpret_cast<unsigned char *>( gptr()); } int gzstreambuf::flush_buffer() { diff --git a/src/gzstream.h b/src/gzstream.h index 4e86ad9..241ff76 100644 --- a/src/gzstream.h +++ b/src/gzstream.h @@ -21,8 +21,8 @@ // Revision : $Revision: 1.5 $ // Revision_date : $Date: 2002/04/26 23:30:15 $ // Author(s) : Deepak Bandyopadhyay, Lutz Kettner -// -// Standard streambuf implementation following Nicolai Josuttis, "The +// +// Standard streambuf implementation following Nicolai Josuttis, "The // Standard C++ Library". // ============================================================================ @@ -58,14 +58,14 @@ public: setp( buffer, buffer + (bufferSize-1)); setg( buffer + 4, // beginning of putback area buffer + 4, // read position - buffer + 4); // end position + buffer + 4); // end position // ASSERT: both input & output capabilities will not be used together } int is_open() { return opened; } gzstreambuf* open( const char* name, int open_mode); gzstreambuf* close(); ~gzstreambuf() { close(); } - + virtual int overflow( int c = EOF); virtual int underflow(); virtual int sync(); @@ -85,15 +85,15 @@ public: // ---------------------------------------------------------------------------- // User classes. Use igzstream and ogzstream analogously to ifstream and -// ofstream respectively. They read and write files based on the gz* +// ofstream respectively. They read and write files based on the gz* // function interface of the zlib. Files are compatible with gzip compression. // ---------------------------------------------------------------------------- class igzstream : public gzstreambase, public std::istream { public: - igzstream() : std::istream( &buf) {} + igzstream() : std::istream( &buf) {} igzstream( const char* name, int open_mode = std::ios::in) - : gzstreambase( name, open_mode), std::istream( &buf) {} + : gzstreambase( name, open_mode), std::istream( &buf) {} gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } void open( const char* name, int open_mode = std::ios::in) { gzstreambase::open( name, open_mode); @@ -104,7 +104,7 @@ class ogzstream : public gzstreambase, public std::ostream { public: ogzstream() : std::ostream( &buf) {} ogzstream( const char* name, int mode = std::ios::out) - : gzstreambase( name, mode), std::ostream( &buf) {} + : gzstreambase( name, mode), std::ostream( &buf) {} gzstreambuf* rdbuf() { return gzstreambase::rdbuf(); } void open( const char* name, int open_mode = std::ios::out) { gzstreambase::open( name, open_mode); diff --git a/src/lapack.h b/src/lapack.h index 88fc509..5e1db35 100644 --- a/src/lapack.h +++ b/src/lapack.h @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __LAPACK_H__ +#ifndef __LAPACK_H__ #define __LAPACK_H__ #include <vector> diff --git a/src/ldr.cpp b/src/ldr.cpp index a1e5791..f0a1b37 100644 --- a/src/ldr.cpp +++ b/src/ldr.cpp @@ -1,17 +1,17 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -24,7 +24,7 @@ #include <cmath> #include <iostream> #include <stdio.h> -#include <stdlib.h> +#include <stdlib.h> #include <ctime> #include <cstring> #include <algorithm> @@ -51,7 +51,7 @@ using namespace Eigen; void LDR::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; - + file_bfile=cPar.file_bfile; file_geno=cPar.file_geno; file_out=cPar.file_out; @@ -62,11 +62,11 @@ void LDR::CopyFromParam (PARAM &cPar) { ni_test=cPar.ni_test; ns_test=cPar.ns_test; n_cvt=cPar.n_cvt; - + indicator_idv=cPar.indicator_idv; indicator_snp=cPar.indicator_snp; snpInfo=cPar.snpInfo; - + return; } @@ -77,7 +77,7 @@ void LDR::CopyToParam (PARAM &cPar) { //X is a p by n matrix. void LDR::VB (const vector<vector<unsigned char> > &Xt, const gsl_matrix *W_gsl, const gsl_vector *y_gsl) { - + // Save gsl_vector and gsl_matrix into Eigen library formats. MatrixXd W(W_gsl->size1, W_gsl->size2); VectorXd y(y_gsl->size); @@ -105,6 +105,6 @@ void LDR::VB (const vector<vector<unsigned char> > &Xt, // Save results. // TO DO. - + return; } @@ -1,22 +1,22 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __LDR_H__ +#ifndef __LDR_H__ #define __LDR_H__ #include <vector> @@ -29,16 +29,16 @@ using namespace std; class LDR { -public: +public: // IO-related parameters. - int a_mode; + int a_mode; size_t d_pace; - + string file_bfile; string file_geno; string file_out; string path_out; - + // Summary statistics. size_t ni_total, ns_total; // Total number of individuals & SNPs. size_t ni_test, ns_test; // Number of individuals & SNPs used @@ -52,16 +52,16 @@ public: // Sequence indicator for SNPs: 0 ignored because of (a) maf, // (b) miss, (c) non-poly; 1 available for analysis. vector<int> indicator_snp; - + vector<SNPINFO> snpInfo; // Record SNP information. - + // Not included in PARAM. - gsl_rng *gsl_r; - + gsl_rng *gsl_r; + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); - + void VB(const vector<vector<unsigned char> > &Xt, const gsl_matrix *W_gsl, const gsl_vector *y_gsl); }; @@ -412,13 +412,13 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) { string chr; std::cout << "Warning: WJA hard coded SNP missingness " << "threshold of 10%" << std::endl; - + // Start reading genotypes and analyze. for (size_t t=0; t<indicator_snp.size(); ++t) { if (t%d_pace==0 || t==(ns_total-1)) { ProgressBar ("Reading SNPs ", t, ns_total-1); } - + // Read SNP header. id.clear(); rs.clear(); @@ -500,7 +500,7 @@ void LM::Analyzebgen (const gsl_matrix *W, const gsl_vector *y) { static_cast<double>(unzipped_data[i*3+1])/32768.0; bgen_geno_prob_BB= static_cast<double>(unzipped_data[i*3+2])/32768.0; - + // WJA bgen_geno_prob_non_miss= bgen_geno_prob_AA + @@ -54,7 +54,7 @@ public: // Sequence indicator for SNPs: 0 ignored because of (a) maf, // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; + vector<int> indicator_snp; vector<SNPINFO> snpInfo; // Record SNP information. diff --git a/src/lmm.cpp b/src/lmm.cpp index 73a9232..2b5ca84 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -55,7 +55,7 @@ void LMM::CopyFromParam (PARAM &cPar) { file_out=cPar.file_out; path_out=cPar.path_out; file_gene=cPar.file_gene; - + // WJA added. file_oxford=cPar.file_oxford; @@ -228,12 +228,12 @@ void CalcPab (const size_t n_cvt, const size_t e_mode, index_aw=GetabIndex (a, p, n_cvt); index_bw=GetabIndex (b, p, n_cvt); index_ww=GetabIndex (p, p, n_cvt); - + ps_ab=gsl_matrix_get (Pab, p-1, index_ab); ps_aw=gsl_matrix_get (Pab, p-1, index_aw); ps_bw=gsl_matrix_get (Pab, p-1, index_bw); ps_ww=gsl_matrix_get (Pab, p-1, index_ww); - + p_ab=ps_ab-ps_aw*ps_bw/ps_ww; gsl_matrix_set (Pab, p, index_ab, p_ab); } @@ -269,7 +269,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, index_aw=GetabIndex (a, p, n_cvt); index_bw=GetabIndex (b, p, n_cvt); index_ww=GetabIndex (p, p, n_cvt); - + ps2_ab=gsl_matrix_get (PPab, p-1, index_ab); ps_aw=gsl_matrix_get (Pab, p-1, index_aw); ps_bw=gsl_matrix_get (Pab, p-1, index_bw); @@ -277,7 +277,7 @@ void CalcPPab (const size_t n_cvt, const size_t e_mode, ps2_aw=gsl_matrix_get (PPab, p-1, index_aw); ps2_bw=gsl_matrix_get (PPab, p-1, index_bw); ps2_ww=gsl_matrix_get (PPab, p-1, index_ww); - + p2_ab=ps2_ab+ps_aw*ps_bw* ps2_ww/(ps_ww*ps_ww); p2_ab-=(ps_aw*ps2_bw+ps_bw*ps2_aw)/ps_ww; @@ -318,7 +318,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, index_aw=GetabIndex (a, p, n_cvt); index_bw=GetabIndex (b, p, n_cvt); index_ww=GetabIndex (p, p, n_cvt); - + ps3_ab=gsl_matrix_get (PPPab, p-1, index_ab); ps_aw=gsl_matrix_get (Pab, p-1, index_aw); ps_bw=gsl_matrix_get (Pab, p-1, index_bw); @@ -337,7 +337,7 @@ void CalcPPPab (const size_t n_cvt, const size_t e_mode, p3_ab+=(ps_aw*ps2_bw*ps2_ww+ps_bw* ps2_aw*ps2_ww+ps_aw*ps_bw*ps3_ww)/ (ps_ww*ps_ww); - + gsl_matrix_set (PPPab, p, index_ab, p3_ab); } } @@ -1479,7 +1479,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, b=ch[0]; // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { + for (size_t j=0; j<4; ++j) { if ((i==(n_bit-1)) && ci_total==(int)ni_total) { break; } @@ -1487,7 +1487,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, ci_total++; continue; } - + if (b[2*j]==0) { if (b[2*j+1]==0) { gsl_vector_set(x, ci_test, 2); @@ -1499,7 +1499,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } else {gsl_vector_set(x, ci_test, -9); n_miss++; } } - + ci_total++; ci_test++; } @@ -1678,7 +1678,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, ProgressBar ("Reading SNPs ", t, ns_total-1); } if (indicator_snp[t]==0) {continue;} - + // Read SNP header. id.clear(); rs.clear(); @@ -1752,14 +1752,14 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, gsl_vector_set_zero(x_miss); for (size_t i=0; i<bgen_N; ++i) { if (indicator_idv[i]==0) {continue;} - + bgen_geno_prob_AA= static_cast<double>(unzipped_data[i*3])/32768.0; bgen_geno_prob_AB= static_cast<double>(unzipped_data[i*3+1])/32768.0; bgen_geno_prob_BB= static_cast<double>(unzipped_data[i*3+2])/32768.0; - + // WJA. bgen_geno_prob_non_miss = bgen_geno_prob_AA + bgen_geno_prob_AB+bgen_geno_prob_BB; @@ -1768,13 +1768,13 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, n_miss++; } else { - + bgen_geno_prob_AA/=bgen_geno_prob_non_miss; bgen_geno_prob_AB/=bgen_geno_prob_non_miss; bgen_geno_prob_BB/=bgen_geno_prob_non_miss; - + geno=2.0*bgen_geno_prob_BB+bgen_geno_prob_AB; - + gsl_vector_set(x, c_phen, geno); gsl_vector_set(x_miss, c_phen, 1.0); x_mean+=geno; @@ -1962,7 +1962,7 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, } } else { - + // If derivates change signs. int status; int iter=0, max_iter=100; @@ -2010,11 +2010,11 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, status=gsl_root_test_interval(lambda_l,lambda_h,0,1e-1); } while (status==GSL_CONTINUE && iter<max_iter); - + iter=0; - + gsl_root_fdfsolver_set (s_fdf, &FDF, l); - + do { iter++; status=gsl_root_fdfsolver_iterate (s_fdf); @@ -2034,7 +2034,7 @@ void CalcLambda (const char func_name, FUNC_PARAM ¶ms, } else { logf_l=LogL_f (l, ¶ms); } - + if (i==0) {logf=logf_l; lambda=l;} else if (logf<logf_l) {logf=logf_l; lambda=l;} else {} @@ -2228,7 +2228,7 @@ void LMM::AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, gsl_blas_dgemv (CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector); gsl_vector_view UtW_expand_x= gsl_matrix_column(UtW_expand, UtW->size2+1); - + // Start reading genotypes and analyze. for (size_t t=0; t<indicator_snp.size(); ++t) { !safeGetline(infile, line).eof(); @@ -2400,7 +2400,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, b=ch[0]; // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { + for (size_t j=0; j<4; ++j) { if ((i==(n_bit-1)) && ci_total==(int)ni_total) { break; } @@ -2408,7 +2408,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, ci_total++; continue; } - + if (b[2*j]==0) { if (b[2*j+1]==0) { gsl_vector_set(x, ci_test, 2); @@ -2420,7 +2420,7 @@ void LMM::AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } else {gsl_vector_set(x, ci_test, -9); n_miss++; } } - + ci_total++; ci_test++; } @@ -1,4 +1,4 @@ -/* +/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou @@ -71,7 +71,7 @@ public: // Indicator for individuals (phenotypes): 0 missing, 1 // available for analysis. - vector<int> indicator_idv; + vector<int> indicator_idv; // Sequence indicator for SNPs: 0 ignored because of (a) maf, // (b) miss, (c) non-poly; 1 available for analysis. diff --git a/src/logistic.h b/src/logistic.h index 7f9e133..b61ab14 100644 --- a/src/logistic.h +++ b/src/logistic.h @@ -1,75 +1,75 @@ -#ifndef LOGISTIC_H_
-#define LOGISTIC_H_
-
-// Mixed interface.
-void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with num. categories.
- gsl_matrix *Xc, // Continuous covariates matrix
- // Nobs x Kc
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_matrix *Xc, // Continuous covariates
- // matrix Nobs x Kc
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Reg. L1 0.0 if not used.
- double lambdaL2); // Reg. L2 0.0 if not used.
-
-double fLogit_mixed(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Categorical-only interface.
-void logistic_cat_pred(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K.
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *yhat); // Vector of prob. predicted by
- // the logistic.
-
-int logistic_cat_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix_int *X, // Matrix Nobs x K .
- gsl_vector_int *nlev, // Vector with number categories.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used
- double lambdaL2); // Regularization L2, 0 if not used
-
-double fLogit_cat(gsl_vector *beta,
- gsl_matrix_int *X,
- gsl_vector_int *nlev,
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-// Continuous-only interface.
-void logistic_cont_pred(gsl_vector *beta, // Vector of parameters
- // length = 1 + Sum_k(C_k-1) + Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *yhat);// Vector of prob. predicted
- // by the logistic.
-
-int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
- // length = 1+Sum_k(C_k-1)+Kc.
- gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc.
- gsl_vector *y, // Vector of prob. to predict.
- double lambdaL1, // Regularization L1, 0 if not used.
- double lambdaL2); // Regularization L2, 0 if not used.
-
-double fLogit_cont(gsl_vector *beta,
- gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
- gsl_vector *y,
- double lambdaL1,
- double lambdaL2);
-
-#endif
+#ifndef LOGISTIC_H_ +#define LOGISTIC_H_ + +// Mixed interface. +void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with num. categories. + gsl_matrix *Xc, // Continuous covariates matrix + // Nobs x Kc + gsl_vector *yhat); // Vector of prob. predicted by + // the logistic. + +int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters + // length = 1+Sum_k(C_k-1)+Kc + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with number categories. + gsl_matrix *Xc, // Continuous covariates + // matrix Nobs x Kc + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Reg. L1 0.0 if not used. + double lambdaL2); // Reg. L2 0.0 if not used. + +double fLogit_mixed(gsl_vector *beta, + gsl_matrix_int *X, + gsl_vector_int *nlev, + gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc + gsl_vector *y, + double lambdaL1, + double lambdaL2); + +// Categorical-only interface. +void logistic_cat_pred(gsl_vector *beta, // Vector of parameters + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K. + gsl_vector_int *nlev, // Vector with number categories. + gsl_vector *yhat); // Vector of prob. predicted by + // the logistic. + +int logistic_cat_fit(gsl_vector *beta, // Vector of parameters + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix_int *X, // Matrix Nobs x K . + gsl_vector_int *nlev, // Vector with number categories. + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Regularization L1, 0 if not used + double lambdaL2); // Regularization L2, 0 if not used + +double fLogit_cat(gsl_vector *beta, + gsl_matrix_int *X, + gsl_vector_int *nlev, + gsl_vector *y, + double lambdaL1, + double lambdaL2); + +// Continuous-only interface. +void logistic_cont_pred(gsl_vector *beta, // Vector of parameters + // length = 1 + Sum_k(C_k-1) + Kc. + gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. + gsl_vector *yhat);// Vector of prob. predicted + // by the logistic. + +int logistic_cont_fit(gsl_vector *beta, // Vector of parameters + // length = 1+Sum_k(C_k-1)+Kc. + gsl_matrix *Xc, // Continuous cov's matrix Nobs x Kc. + gsl_vector *y, // Vector of prob. to predict. + double lambdaL1, // Regularization L1, 0 if not used. + double lambdaL2); // Regularization L2, 0 if not used. + +double fLogit_cont(gsl_vector *beta, + gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc. + gsl_vector *y, + double lambdaL1, + double lambdaL2); + +#endif diff --git a/src/main.cpp b/src/main.cpp index b7ac884..c7f0573 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,12 +25,12 @@ using namespace std; -int main(int argc, char * argv[]) { - GEMMA cGemma; +int main(int argc, char * argv[]) { + GEMMA cGemma; PARAM cPar; if (argc <= 1) { - cGemma.PrintHeader(); + cGemma.PrintHeader(); return EXIT_SUCCESS; } if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'h') { @@ -46,27 +46,27 @@ int main(int argc, char * argv[]) { if (argc==2 && argv[1][0] == '-' && argv[1][1] == 'l') { cGemma.PrintLicense(); return EXIT_SUCCESS; - } - - cGemma.Assign(argc, argv, cPar); + } + + cGemma.Assign(argc, argv, cPar); ifstream check_dir((cPar.path_out).c_str()); if (!check_dir) { mkdir((cPar.path_out).c_str(), S_IRWXU|S_IRGRP|S_IROTH); - } - + } + if (cPar.error==true) {return EXIT_FAILURE;} - + if (cPar.mode_silence) {stringstream ss; cout.rdbuf (ss.rdbuf());} - + cPar.CheckParam(); - + if (cPar.error==true) {return EXIT_FAILURE;} - + cGemma.BatchRun(cPar); - + if (cPar.error==true) {return EXIT_FAILURE;} - + cGemma.WriteLog(argc, argv, cPar); - + return EXIT_SUCCESS; } diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index c09b587..709bdde 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -375,7 +375,7 @@ double CalcHWE (const size_t n_hom1, const size_t n_hom2, const size_t n_ab) { het_probs[i] /= sum; double p_hwe = 0.0; - + // p-value calculation for p_hwe. for (i = 0; i <= rare_copies; i++) { diff --git a/src/param.cpp b/src/param.cpp index a56f5eb..413d517 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -66,7 +66,7 @@ time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) // Read files: obtain ns_total, ng_total, ns_test, ni_test. void PARAM::ReadFiles (void) { string file_str; - + // Read cat file. if (!file_mcat.empty()) { if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;} @@ -216,7 +216,7 @@ void PARAM::ReadFiles (void) { // If both fam file and pheno files are used, use // phenotypes inside the pheno file. if (!file_pheno.empty()) { - + // Phenotype file before genotype file. if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} @@ -247,7 +247,7 @@ void PARAM::ReadFiles (void) { // Read genotype and phenotype file for BIMBAM format. if (!file_geno.empty()) { - + // Annotation file before genotype file. if (!file_anno.empty() ) { if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, @@ -297,11 +297,11 @@ void PARAM::ReadFiles (void) { if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} if (t==0) { - + // If both fam file and pheno files are used, use // phenotypes inside the pheno file. if (!file_pheno.empty()) { - + // Phenotype file before genotype file. if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) { @@ -347,7 +347,7 @@ void PARAM::ReadFiles (void) { // Read genotype and phenotype file for multiple BIMBAM files. if (!file_mgeno.empty()) { - + // Annotation file before genotype file. if (!file_anno.empty() ) { if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, @@ -788,7 +788,7 @@ void PARAM::CheckParam (void) { if (!file_bfile.empty()) {flag++;} if (!file_geno.empty()) {flag++;} if (!file_gene.empty()) {flag++;} - + // WJA added. if (!file_oxford.empty()) {flag++;} @@ -982,7 +982,7 @@ void PARAM::CheckData (void) { // WJA NOTE: I added this condition so that covariates can be added // through sample, probably not exactly what is wanted. - if(file_oxford.empty()) + if(file_oxford.empty()) { if ((file_cvt).empty() || (indicator_cvt).size()==0) { n_cvt=1; @@ -1017,7 +1017,7 @@ void PARAM::CheckData (void) { "the number of individuals. "<<endl; return; } - + if ( (indicator_read).size()!=0 && (indicator_read).size()!=(indicator_idv).size()) { error=true; @@ -1025,7 +1025,7 @@ void PARAM::CheckData (void) { "match the number of individuals. "<<endl; return; } - + // Calculate ni_total and ni_test, and set indicator_idv to 0 // whenever indicator_cvt=0, and calculate np_obs and np_miss. ni_total=(indicator_idv).size(); @@ -1896,7 +1896,7 @@ void PARAM::CheckCvt () { // Post-process phentoypes and covariates. void PARAM::ProcessCvtPhen () { - + // Convert indicator_pheno to indicator_idv. int k=1; indicator_idv.clear(); @@ -1949,7 +1949,7 @@ void PARAM::ProcessCvtPhen () { cout<<"error! number of subsamples is less than number of"<< "analyzed individuals. "<<endl; } else { - + // Set up random environment. gsl_rng_env_setup(); gsl_rng *gsl_r; diff --git a/src/param.h b/src/param.h index 9707790..f58da53 100644 --- a/src/param.h +++ b/src/param.h @@ -141,7 +141,7 @@ public: string file_read; // File containing total number of reads. string file_gene; // Gene expression file. string file_snps; // File containing analyzed SNPs or genes. - + // WJA added. string file_oxford; @@ -212,7 +212,7 @@ public: // Summary statistics. bool error; - + // Number of individuals. size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; @@ -221,7 +221,7 @@ public: // Number of SNPs. size_t ns_total, ns_test, ns_study, ns_ref; - + size_t ng_total, ng_test; // Number of genes. size_t ni_control, ni_case; // Number of controls and number of cases. size_t ni_subsample; // Number of subsampled individuals. diff --git a/src/prdt.cpp b/src/prdt.cpp index db0fa14..b29d150 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -1,17 +1,17 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou - + This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - + You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -24,7 +24,7 @@ #include <bitset> #include <vector> #include <stdio.h> -#include <stdlib.h> +#include <stdlib.h> #include <cmath> #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" @@ -43,36 +43,36 @@ using namespace std; void PRDT::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; - + file_bfile=cPar.file_bfile; file_geno=cPar.file_geno; file_out=cPar.file_out; path_out=cPar.path_out; - - indicator_pheno=cPar.indicator_pheno; + + indicator_pheno=cPar.indicator_pheno; indicator_cvt=cPar.indicator_cvt; indicator_idv=cPar.indicator_idv; - + snpInfo=cPar.snpInfo; mapRS2est=cPar.mapRS2est; - + time_eigen=0; - + n_ph=cPar.n_ph; np_obs=cPar.np_obs; np_miss=cPar.np_miss; ns_total=cPar.ns_total; - ns_test=0; - + ns_test=0; + return; } void PRDT::CopyToParam (PARAM &cPar) { cPar.ns_test=ns_test; cPar.time_eigen=time_eigen; - + return; -} +} void PRDT::WriteFiles (gsl_vector *y_prdt) { string file_str; @@ -80,13 +80,13 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) { file_str+="."; file_str+="prdt"; file_str+=".txt"; - + ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) { cout<<"error writing file: "<<file_str.c_str()<<endl; return; } - + size_t ci_test=0; for (size_t i=0; i<indicator_idv.size(); i++) { if (indicator_idv[i]==1) { @@ -96,7 +96,7 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) { ci_test++; } } - + outfile.close(); outfile.clear(); return; @@ -106,13 +106,13 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) { string file_str; file_str=path_out+"/"+file_out; file_str+=".prdt.txt"; - + ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) { cout<<"error writing file: "<<file_str.c_str()<<endl; return; } - + size_t ci_test=0; for (size_t i=0; i<indicator_cvt.size(); i++) { if (indicator_cvt[i]==0) { @@ -126,7 +126,7 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) { ci_test++; } } - + outfile.close(); outfile.clear(); return; @@ -134,21 +134,21 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) { void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { size_t ni_test=u_hat->size, ni_total=G->size1; - + gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test); gsl_matrix *Gfo=gsl_matrix_alloc (ni_total-ni_test, ni_test); - gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test); + gsl_matrix *U=gsl_matrix_alloc (ni_test, ni_test); gsl_vector *eval=gsl_vector_alloc (ni_test); gsl_vector *Utu=gsl_vector_alloc (ni_test); gsl_vector *w=gsl_vector_alloc (ni_total); gsl_permutation *pmt=gsl_permutation_alloc (ni_test); - + //center matrix G based on indicator_idv for (size_t i=0; i<ni_total; i++) { gsl_vector_set(w, i, indicator_idv[i]); } CenterMatrix(G, w); - + //obtain Koo and Kfo size_t o_i=0, o_j=0; double d; @@ -166,7 +166,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { } if (indicator_idv[i]==1) {o_i++;} } - + //matrix operations to get u_prdt cout<<"Start Eigen-Decomposition..."<<endl; clock_t time_start=clock(); @@ -177,8 +177,8 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { } } - time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - + time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu); for (size_t i=0; i<eval->size; i++) { d=gsl_vector_get(eval, i); @@ -189,7 +189,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { } gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval); gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); - + // Free matrices. gsl_matrix_free(Goo); gsl_matrix_free(Gfo); @@ -199,7 +199,7 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { gsl_vector_free(w); gsl_permutation_free(pmt); - return; + return; } void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { @@ -208,17 +208,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { cout<<"error reading genotype file:"<<file_geno<<endl; return; } - + string line; char *ch_ptr; string rs; - + size_t n_miss, n_train_nomiss, c_phen; double geno, x_mean, x_train_mean, effect_size; - + gsl_vector *x=gsl_vector_alloc (y_prdt->size); gsl_vector *x_miss=gsl_vector_alloc (y_prdt->size); - + ns_test=0; // Start reading genotypes and analyze. @@ -227,24 +227,24 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { if (t%d_pace==0 || t==(ns_total-1)) { ProgressBar ("Reading SNPs ", t, ns_total-1); } - + ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; ch_ptr=strtok (NULL, " , \t"); - ch_ptr=strtok (NULL, " , \t"); - + ch_ptr=strtok (NULL, " , \t"); + if (mapRS2est.count(rs)==0) { continue; } else { effect_size=mapRS2est[rs]; } - + x_mean=0.0; c_phen=0; n_miss=0; x_train_mean=0; n_train_nomiss=0; - + gsl_vector_set_zero(x_miss); for (size_t i=0; i<indicator_idv.size(); ++i) { @@ -260,10 +260,10 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { gsl_vector_set(x_miss, c_phen, 0.0); n_miss++; } else { - geno=atof(ch_ptr); - - gsl_vector_set(x, c_phen, geno); - gsl_vector_set(x_miss, c_phen, 1.0); + geno=atof(ch_ptr); + + gsl_vector_set(x, c_phen, geno); + gsl_vector_set(x_miss, c_phen, 1.0); x_mean+=geno; } c_phen++; @@ -274,12 +274,12 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { cout << "snp " << rs << " has missing genotype for all " << "individuals and will be ignored." << endl; continue;} - + x_mean/=(double)(x->size-n_miss); x_train_mean/=(double)(n_train_nomiss); - - + + for (size_t i=0; i<x->size; ++i) { geno=gsl_vector_get(x, i); if (gsl_vector_get (x_miss, i)==0) { @@ -291,17 +291,17 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { gsl_vector_scale (x, effect_size); gsl_vector_add (y_prdt, x); - + ns_test++; - } + } cout<<endl; - + gsl_vector_free (x); gsl_vector_free (x_miss); - + infile.close(); infile.clear(); - + return; } @@ -312,35 +312,35 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) { cout<<"error reading bed file:"<<file_bed<<endl; return; } - + char ch[1]; - bitset<8> b; + bitset<8> b; string rs; - + size_t n_bit, n_miss, ci_total, ci_test, n_train_nomiss; double geno, x_mean, x_train_mean, effect_size; - + gsl_vector *x=gsl_vector_alloc (y_prdt->size); - + // Calculate n_bit and c, the number of bit for each SNP. if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;} else {n_bit=indicator_idv.size()/4+1; } - + // Print the first 3 magic numbers. for (size_t i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; - } - + } + ns_test=0; - + for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { if (t%d_pace==0 || t==snpInfo.size()-1) { ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); } - + rs=snpInfo[t].rs_number; - + if (mapRS2est.count(rs)==0) { continue; } else { @@ -349,7 +349,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) { // n_bit, and 3 is the number of magic numbers. infile.seekg(t*n_bit+3); - + // Read genotypes. x_mean=0.0; n_miss=0; @@ -359,7 +359,7 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) { b=ch[0]; // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { + for (size_t j=0; j<4; ++j) { if ((i==(n_bit-1)) && ci_total==indicator_idv.size()) { break; @@ -404,19 +404,19 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) { ci_test++; } ci_total++; - + } } - + if (x->size==n_miss) { cout << "snp " << rs << " has missing genotype for all " << "individuals and will be ignored."<<endl; continue; } - + x_mean/=(double)(x->size-n_miss); x_train_mean/=(double)(n_train_nomiss); - + for (size_t i=0; i<x->size; ++i) { geno=gsl_vector_get(x, i); if (geno==-9) { @@ -425,47 +425,47 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) { gsl_vector_set(x, i, geno-x_train_mean); } } - + gsl_vector_scale (x, effect_size); gsl_vector_add (y_prdt, x); - + ns_test++; - } + } cout<<endl; - + gsl_vector_free (x); - + infile.close(); - infile.clear(); - + infile.clear(); + return; } // Predict missing phenotypes using ridge regression. // Y_hat contains fixed effects void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, - gsl_matrix *Y_full) { + gsl_matrix *Y_full) { gsl_vector *y_obs=gsl_vector_alloc (np_obs); gsl_vector *y_miss=gsl_vector_alloc (np_miss); gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs); gsl_matrix *H_mo=gsl_matrix_alloc (np_miss, np_obs); gsl_vector *Hiy=gsl_vector_alloc (np_obs); - + size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0; - + // Obtain H_oo, H_mo. - c_obs1=0; c_miss1=0; + c_obs1=0; c_miss1=0; for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) { if (indicator_cvt[i1]==0) {continue;} for (vector<int>::size_type j1=0; j1<n_ph; ++j1) { - + c_obs2=0; c_miss2=0; for (vector<int>::size_type i2=0; i2<indicator_pheno.size(); ++i2) { if (indicator_cvt[i2]==0) {continue;} for (vector<int>::size_type j2=0; j2<n_ph; j2++) { - + if (indicator_pheno[i2][j2]==1) { if (indicator_pheno[i1][j1]==1) { gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); @@ -476,30 +476,30 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, } else { c_miss2++; } - } + } } - + if (indicator_pheno[i1][j1]==1) { c_obs1++; } else { c_miss1++; } } - - } - + + } + // Do LU decomposition of H_oo. int sig; gsl_permutation * pmt=gsl_permutation_alloc (np_obs); LUDecomp (H_oo, pmt, &sig); - + // Obtain y_obs=y_full-y_hat. // Add the fixed effects part to y_miss: y_miss=y_hat. c_obs1=0; c_miss1=0; for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { if (indicator_cvt[i]==0) {continue;} - + for (vector<int>::size_type j=0; j<n_ph; ++j) { if (indicator_pheno[i][j]==1) { gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j)-gsl_matrix_get (Y_hat, i, j) ); @@ -509,18 +509,18 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, c_miss1++; } } - } - + } + LUSolve (H_oo, pmt, y_obs, Hiy); - + gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); - + // Put back predicted y_miss to Y_full. c_miss1=0; for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { if (indicator_cvt[i]==0) {continue;} - + for (vector<int>::size_type j=0; j<n_ph; ++j) { if (indicator_pheno[i][j]==0) { gsl_matrix_set (Y_full, i, j, gsl_vector_get (y_miss, c_miss1) ); @@ -528,14 +528,14 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, } } } - + // Free matrices. gsl_vector_free(y_obs); gsl_vector_free(y_miss); gsl_matrix_free(H_oo); gsl_matrix_free(H_mo); gsl_vector_free(Hiy); - + return; } @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __PRDT_H__ +#ifndef __PRDT_H__ #define __PRDT_H__ #include <vector> @@ -29,30 +29,30 @@ using namespace std; class PRDT { - + public: // IO-related parameters. size_t a_mode; size_t d_pace; - + string file_bfile; string file_geno; string file_out; string path_out; - + vector<vector<int> > indicator_pheno; vector<int> indicator_cvt; vector<int> indicator_idv; vector<SNPINFO> snpInfo; map<string, double> mapRS2est; - + size_t n_ph; size_t np_obs, np_miss; size_t ns_total; size_t ns_test; - + double time_eigen; - + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); diff --git a/src/varcov.cpp b/src/varcov.cpp index 48a4fc5..46b5bf8 100644 --- a/src/varcov.cpp +++ b/src/varcov.cpp @@ -28,7 +28,7 @@ #include <cstring> #include <cmath> #include <stdio.h> -#include <stdlib.h> +#include <stdlib.h> #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" @@ -47,22 +47,22 @@ using namespace std; void VARCOV::CopyFromParam (PARAM &cPar) { d_pace=cPar.d_pace; - + file_bfile=cPar.file_bfile; file_geno=cPar.file_geno; file_out=cPar.file_out; path_out=cPar.path_out; - + time_opt=0.0; window_cm=cPar.window_cm; window_bp=cPar.window_bp; window_ns=cPar.window_ns; - + indicator_idv=cPar.indicator_idv; indicator_snp=cPar.indicator_snp; snpInfo=cPar.snpInfo; - + return; } @@ -82,7 +82,7 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, if (flag==0) { outfile.open (file_cov.c_str(), ofstream::out); if (!outfile) {cout<<"error writing file: "<<file_cov<<endl; return;} - + outfile<<"chr"<<"\t"<<"rs"<<"\t"<<"ps"<<"\t"<<"n_mis" <<"\t"<<"n_obs"<<"\t"<<"allele1"<<"\t"<<"allele0" <<"\t"<<"af"<<"\t"<<"window_size" @@ -111,11 +111,11 @@ void VARCOV::WriteCov (const int flag, const vector<SNPINFO> &snpInfo_sub, } } } - + outfile<<endl; } } - + outfile.close(); outfile.clear(); return; @@ -147,7 +147,7 @@ void VARCOV::CalcNB (vector<SNPINFO> &snpInfo_sort) { size_t t2=0, n_nb=0; for (size_t t=0; t<indicator_snp.size(); ++t) { if (indicator_snp[t]==0) {continue;} - + if (snpInfo_sort[t].chr=="-9" || (snpInfo_sort[t].cM==-9 && window_cm!=0) || (snpInfo_sort[t].base_position==-9 && window_bp!=0) ) { @@ -203,7 +203,7 @@ void Calc_Cor(vector<vector<double> > &X_mat, vector<double> &cov_vec) { } return; -} +} // Read the genotype file again, calculate r2 between pairs of SNPs // within a window, output the file every 10K SNPs the output results @@ -229,7 +229,7 @@ void VARCOV::AnalyzeBimbam () { for (size_t i=0; i<indicator_idv.size(); i++) { ni_test+=indicator_idv[i]; } - + gsl_vector *geno=gsl_vector_alloc (ni_test); double geno_mean; @@ -253,29 +253,29 @@ void VARCOV::AnalyzeBimbam () { if (X_mat.size()==0) { n_nb=snpInfo[t].n_nb+1; } else { - n_nb=snpInfo[t].n_nb-n_nb+1; + n_nb=snpInfo[t].n_nb-n_nb+1; } for (int i=0; i<n_nb; i++) { - if (X_mat.size()==0) {t2=t;} + if (X_mat.size()==0) {t2=t;} // Read a line of the snp is filtered out. inc=0; while (t2<indicator_snp.size() && indicator_snp[t2]==0) { - t2++; inc++; + t2++; inc++; } Bimbam_ReadOneSNP (inc, indicator_idv, infile, geno, geno_mean); gsl_vector_add_constant (geno, -1.0*geno_mean); - + for (size_t j=0; j<geno->size; j++) { x_vec[j]=gsl_vector_get(geno, j); } X_mat.push_back(x_vec); t2++; - } - + } + n_nb=snpInfo[t].n_nb; Calc_Cor(X_mat, cov_vec); @@ -301,8 +301,8 @@ void VARCOV::AnalyzeBimbam () { gsl_vector_free(geno); infile.close(); - infile.clear(); - + infile.clear(); + return; } @@ -314,7 +314,7 @@ void VARCOV::AnalyzePlink () { // Calculate the number of right-hand-side neighbours for each SNP. vector<SNPINFO> snpInfo_sub; CalcNB(snpInfo); - + size_t ni_test=0; for (size_t i=0; i<indicator_idv.size(); i++) { ni_test+=indicator_idv[i]; @@ -343,30 +343,30 @@ void VARCOV::AnalyzePlink () { if (X_mat.size()==0) { n_nb=snpInfo[t].n_nb+1; } else { - n_nb=snpInfo[t].n_nb-n_nb+1; + n_nb=snpInfo[t].n_nb-n_nb+1; } for (int i=0; i<n_nb; i++) { - if (X_mat.size()==0) {t2=t;} + if (X_mat.size()==0) {t2=t;} // Read a line if the SNP is filtered out. inc=0; while (t2<indicator_snp.size() && indicator_snp[t2]==0) { t2++; - inc++; + inc++; } Plink_ReadOneSNP (t2, indicator_idv, infile, geno, geno_mean); gsl_vector_add_constant (geno, -1.0*geno_mean); - + for (size_t j=0; j<geno->size; j++) { x_vec[j]=gsl_vector_get(geno, j); } X_mat.push_back(x_vec); t2++; - } - + } + n_nb=snpInfo[t].n_nb; Calc_Cor(X_mat, cov_vec); @@ -392,7 +392,7 @@ void VARCOV::AnalyzePlink () { gsl_vector_free(geno); infile.close(); - infile.clear(); - + infile.clear(); + return; } diff --git a/src/varcov.h b/src/varcov.h index 3b45123..4a1eb3a 100644 --- a/src/varcov.h +++ b/src/varcov.h @@ -16,7 +16,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef __VARCOV_H__ +#ifndef __VARCOV_H__ #define __VARCOV_H__ #include "gsl/gsl_vector.h" @@ -47,7 +47,7 @@ public: double window_cm; size_t window_bp; size_t window_ns; - + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); @@ -175,7 +175,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { gsl_matrix *WtHiWiWtHi=gsl_matrix_alloc(n_cvt, n1); double sigma2; - + // Calculate H = \sum_i^{k+1} \sigma_i^2 K_i. gsl_matrix_set_zero (p->P); for (size_t i=0; i<n_vc+1; i++) { @@ -196,7 +196,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { gsl_matrix_scale(K_temp, sigma2); gsl_matrix_add (p->P, K_temp); } - + // Calculate H^{-1}. eigenlib_invert(p->P); @@ -211,7 +211,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { // Calculate Py, KPy, PKPy. gsl_blas_dgemv(CblasNoTrans, 1.0, p->P, p->y, 0.0, p->Py); - + double d; for (size_t i=0; i<n_vc+1; i++) { gsl_vector_view KPy=gsl_matrix_column (p->KPy_mat, i); @@ -221,7 +221,7 @@ void UpdateParam (const gsl_vector *log_sigma2, VC_PARAM *p) { gsl_vector_memcpy (&KPy.vector, p->Py); } else { gsl_matrix_const_view K_sub=gsl_matrix_const_submatrix (p->K, 0, n1*i, n1, n1); - + // Seems to be important to use gsl dgemv here instead of // eigenlib_dgemv; otherwise. gsl_blas_dgemv(CblasNoTrans, 1.0, &K_sub.matrix, p->Py, 0.0, @@ -643,7 +643,7 @@ void ReadFile_cor (const string &file_cor, const set<string> &setSnps, } while (!safeGetline(infile, line).eof()) { - + //do not read cor values this time; upto col_n-1. ch_ptr=strtok ((char *)line.c_str(), " , \t"); @@ -942,7 +942,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, ReadHeader_vc (line, header); while (!safeGetline(infile, line).eof()) { - + // Do not read cor values this time; upto col_n-1. d_pos1=0; d_cm1=0; ch_ptr=strtok ((char *)line.c_str(), " , \t"); @@ -1115,7 +1115,7 @@ void ReadFile_cor (const string &file_cor, const vector<string> &vec_rs, mat_S[i][j]*=crt_factor; mat_S[j][i]*=crt_factor; } cout<<crt_factor<<endl; - + // Correct qvar. if (i==j) { vec_qvar[i]*=crt_factor; @@ -1198,7 +1198,7 @@ void CalcVCss(const gsl_matrix *Vq, const gsl_matrix *S_mat, // Get qvar_mat. gsl_matrix_memcpy (qvar_mat, Vq); gsl_matrix_scale (qvar_mat, 1.0/(df*df)); - + // Calculate variance for these estimates. for (size_t i=0; i<n_vc; i++) { for (size_t j=i; j<n_vc; j++) { @@ -1873,7 +1873,7 @@ void VC::CalcVCacl (const gsl_matrix *K, const gsl_matrix *W, size_t it=0; double s=1; while (abs(s)>1e-3 && it<100) { - + // Update tau_inv. gsl_blas_ddot (q_vec, pve, &d); if (it>0) {s=y2_sum/(double)n1-d/((double)n1*((double)n1-1))-tau_inv;} @@ -2143,7 +2143,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace, gsl_vector_mul(wz, w); int n_bit; - + // Calculate n_bit and c, the number of bit for each snp. if (ni_total%4==0) {n_bit=ni_total/4;} else {n_bit=ni_total/4+1; } @@ -2170,7 +2170,7 @@ bool PlinkXwz (const string &file_bed, const int display_pace, b=ch[0]; // Minor allele homozygous: 2.0; major: 0.0. - for (size_t j=0; j<4; ++j) { + for (size_t j=0; j<4; ++j) { if ((i==(n_bit-1)) && ci_total==ni_total) { break; } @@ -2393,7 +2393,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace, if (indicator_snp[t]==0) {continue;} // n_bit, and 3 is the number of magic numbers. - infile.seekg(t*n_bit+3); + infile.seekg(t*n_bit+3); // Read genotypes. geno_mean=0.0; n_miss=0; ci_total=0; geno_var=0.0; ci_test=0; @@ -2402,7 +2402,7 @@ bool PlinkXtXwz (const string &file_bed, const int display_pace, b=ch[0]; // Minor allele homozygous: 2.0; major: 0.0; - for (size_t j=0; j<4; ++j) { + for (size_t j=0; j<4; ++j) { if ((i==(n_bit-1)) && ci_total==ni_total) { break; } |