diff options
author | Peter Carbonetto | 2017-06-07 23:23:35 -0500 |
---|---|---|
committer | Peter Carbonetto | 2017-06-07 23:23:35 -0500 |
commit | 93a7a2adb03f61e80badf6a5004fa4850dbb7d48 (patch) | |
tree | 72eb62acf1bc21000cd969e62658261590eab36e /src | |
parent | 35e4ee4767c35c2436fea81788742641172ada37 (diff) | |
download | pangemma-93a7a2adb03f61e80badf6a5004fa4850dbb7d48.tar.gz |
Removed FORCE_FLOAT from a few more files.
Diffstat (limited to 'src')
-rw-r--r-- | src/bslmm.cpp | 839 | ||||
-rw-r--r-- | src/bslmm.h | 158 | ||||
-rw-r--r-- | src/bslmmdap.cpp | 260 | ||||
-rw-r--r-- | src/bslmmdap.h | 62 | ||||
-rw-r--r-- | src/gemma.h | 17 | ||||
-rw-r--r-- | src/lmm.h | 113 | ||||
-rw-r--r-- | src/logistic.cpp | 46 | ||||
-rw-r--r-- | src/mvlmm.h | 81 |
8 files changed, 874 insertions, 702 deletions
diff --git a/src/bslmm.cpp b/src/bslmm.cpp index d295fd8..92762e2 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -14,7 +14,7 @@ You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. - */ +*/ #include <iostream> #include <fstream> @@ -47,8 +47,7 @@ using namespace std; -void BSLMM::CopyFromParam (PARAM &cPar) -{ +void BSLMM::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; @@ -101,9 +100,7 @@ void BSLMM::CopyFromParam (PARAM &cPar) return; } - -void BSLMM::CopyToParam (PARAM &cPar) -{ +void BSLMM::CopyToParam (PARAM &cPar) { cPar.time_UtZ=time_UtZ; cPar.time_Omega=time_Omega; cPar.time_Proposal=time_Proposal; @@ -115,16 +112,16 @@ void BSLMM::CopyToParam (PARAM &cPar) return; } - - -void BSLMM::WriteBV (const gsl_vector *bv) -{ +void BSLMM::WriteBV (const gsl_vector *bv) { string file_str; file_str=path_out+"/"+file_out; file_str+=".bv.txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return; + } size_t t=0; for (size_t i=0; i<ni_total; ++i) { @@ -132,7 +129,8 @@ void BSLMM::WriteBV (const gsl_vector *bv) outfile<<"NA"<<endl; } else { - outfile<<scientific<<setprecision(6)<<gsl_vector_get(bv, t)<<endl; + outfile<<scientific<<setprecision(6)<< + gsl_vector_get(bv, t)<<endl; t++; } } @@ -142,17 +140,16 @@ void BSLMM::WriteBV (const gsl_vector *bv) return; } - - - -void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector *alpha, const size_t w) -{ +void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, + const gsl_vector *alpha, const size_t w) { string file_str; file_str=path_out+"/"+file_out; file_str+=".param.txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return;} outfile<<"chr"<<"\t"<<"rs"<<"\t" <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" @@ -163,11 +160,13 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector if (indicator_snp[i]==0) {continue;} outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" - <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; + <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; - outfile<<scientific<<setprecision(6)<<gsl_vector_get(alpha, t)<<"\t"; + outfile<<scientific<<setprecision(6)<< + gsl_vector_get(alpha, t)<<"\t"; if (beta_g[t].second!=0) { - outfile<<beta_g[t].first/beta_g[t].second<<"\t"<<beta_g[t].second/(double)w<<endl; + outfile<<beta_g[t].first/beta_g[t].second<< + "\t"<<beta_g[t].second/(double)w<<endl; } else { outfile<<0.0<<"\t"<<0.0<<endl; @@ -180,15 +179,16 @@ void BSLMM::WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector return; } - -void BSLMM::WriteParam (const gsl_vector *alpha) -{ +void BSLMM::WriteParam (const gsl_vector *alpha) { string file_str; file_str=path_out+"/"+file_out; file_str+=".param.txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return; + } outfile<<"chr"<<"\t"<<"rs"<<"\t" <<"ps"<<"\t"<<"n_miss"<<"\t"<<"alpha"<<"\t" @@ -198,9 +198,10 @@ void BSLMM::WriteParam (const gsl_vector *alpha) for (size_t i=0; i<ns_total; ++i) { if (indicator_snp[i]==0) {continue;} - outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t" - <<snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; - outfile<<scientific<<setprecision(6)<<gsl_vector_get(alpha, t)<<"\t"; + outfile<<snpInfo[i].chr<<"\t"<<snpInfo[i].rs_number<<"\t"<< + snpInfo[i].base_position<<"\t"<<snpInfo[i].n_miss<<"\t"; + outfile<<scientific<<setprecision(6)<< + gsl_vector_get(alpha, t)<<"\t"; outfile<<0.0<<"\t"<<0.0<<endl; t++; } @@ -210,9 +211,8 @@ void BSLMM::WriteParam (const gsl_vector *alpha) return; } - -void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col) -{ +void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, + const gsl_matrix *Result_gamma, const size_t w_col) { string file_gamma, file_hyp; file_gamma=path_out+"/"+file_out; file_gamma+=".gamma.txt"; @@ -224,8 +224,14 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl if (flag==0) { outfile_gamma.open (file_gamma.c_str(), ofstream::out); outfile_hyp.open (file_hyp.c_str(), ofstream::out); - if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;} - if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;} + if (!outfile_gamma) { + cout<<"error writing file: "<<file_gamma<<endl; + return; + } + if (!outfile_hyp) { + cout<<"error writing file: "<<file_hyp<<endl; + return; + } outfile_hyp<<"h \t pve \t rho \t pge \t pi \t n_gamma"<<endl; @@ -237,8 +243,14 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl else { outfile_gamma.open (file_gamma.c_str(), ofstream::app); outfile_hyp.open (file_hyp.c_str(), ofstream::app); - if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;} - if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;} + if (!outfile_gamma) { + cout<<"error writing file: "<<file_gamma<<endl; + return; + } + if (!outfile_hyp) { + cout<<"error writing file: "<<file_hyp<<endl; + return; + } size_t w; if (w_col==0) {w=w_pace;} @@ -247,16 +259,19 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl for (size_t i=0; i<w; ++i) { outfile_hyp<<scientific; for (size_t j=0; j<4; ++j) { - outfile_hyp<<setprecision(6)<<gsl_matrix_get (Result_hyp, i, j)<<"\t"; + outfile_hyp<<setprecision(6)<< + gsl_matrix_get (Result_hyp, i, j)<<"\t"; } - outfile_hyp<<setprecision(6)<<exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t"; - outfile_hyp<<(int)gsl_matrix_get (Result_hyp, i, 5)<<"\t"; + outfile_hyp<<setprecision(6)<< + exp(gsl_matrix_get (Result_hyp, i, 4))<<"\t"; + outfile_hyp<<(int)gsl_matrix_get(Result_hyp,i,5)<<"\t"; outfile_hyp<<endl; } for (size_t i=0; i<w; ++i) { for (size_t j=0; j<s_max; ++j) { - outfile_gamma<<(int)gsl_matrix_get (Result_gamma, i, j)<<"\t"; + outfile_gamma<< + (int)gsl_matrix_get(Result_gamma,i,j)<<"\t"; } outfile_gamma<<endl; } @@ -270,13 +285,11 @@ void BSLMM::WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl return; } - - -void BSLMM::CalcPgamma (double *p_gamma) -{ +void BSLMM::CalcPgamma (double *p_gamma) { double p, s=0.0; for (size_t i=0; i<ns_test; ++i) { - p=0.7*gsl_ran_geometric_pdf (i+1, 1.0/geo_mean)+0.3/(double)ns_test; + p=0.7*gsl_ran_geometric_pdf (i+1, 1.0/geo_mean)+0.3/ + (double)ns_test; p_gamma[i]=p; s+=p; } @@ -287,10 +300,8 @@ void BSLMM::CalcPgamma (double *p_gamma) return; } - - -void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank) -{ +void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, + vector<size_t> &rank) { size_t pos; for (size_t i=0; i<rank.size(); ++i) { pos=mapRank2pos[rank[i]]; @@ -302,10 +313,8 @@ void BSLMM::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> & return; } - - -double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const double sigma_a2) -{ +double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2) { double pve, var_y; gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); @@ -333,9 +342,9 @@ double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, cons return pve; } - -void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, vector<size_t> &rank, class HYPBSLMM &cHyp, vector<pair<size_t, double> > &pos_loglr) -{ +void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank, class HYPBSLMM &cHyp, + vector<pair<size_t, double> > &pos_loglr) { double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1); cHyp.n_gamma=0; @@ -362,7 +371,8 @@ void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, vector<si SetXgamma (UtXgamma, UtX, rank); double sigma_a2; if (trace_G!=0) { - sigma_a2=cHyp.h*1.0/(trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); + sigma_a2=cHyp.h*1.0/ + (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); } else { sigma_a2=cHyp.h*1.0/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); } @@ -379,18 +389,6 @@ void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, vector<si if (cHyp.logp<logp_min) {cHyp.logp=logp_min;} if (cHyp.logp>logp_max) {cHyp.logp=logp_max;} - -// if (fix_sigma>=0) { -// fix_sigma=cHyp.h; -// rho_max=1-cHyp.h; -// cHyp.rho=rho_max/2.0; -// } - - //Initial for grid sampling: -// cHyp.h=0.225; -// cHyp.rho=1.0; -// cHyp.logp=-4.835429; - cout<<"initial value of h = "<<cHyp.h<<endl; cout<<"initial value of rho = "<<cHyp.rho<<endl; cout<<"initial value of pi = "<<exp(cHyp.logp)<<endl; @@ -399,10 +397,9 @@ void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, vector<si return; } - - -double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *Utu, gsl_vector *alpha_prime, class HYPBSLMM &cHyp) -{ +double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, + class HYPBSLMM &cHyp) { double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); gsl_vector *Utu_rand=gsl_vector_alloc (Uty->size); @@ -420,25 +417,28 @@ double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, gs uy=gsl_vector_get (Uty, i); Hi_yy+=d*uy*uy; - gsl_vector_set (Utu_rand, i, gsl_ran_gaussian(gsl_r, 1)*sqrt(ds)); + gsl_vector_set (Utu_rand, i, + gsl_ran_gaussian(gsl_r, 1)*sqrt(ds)); } - //sample tau + // Sample tau. double tau=1.0; - if (a_mode==11) {tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy); } + if (a_mode==11) { + tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/Hi_yy); + } - //sample alpha + // Sample alpha. gsl_vector_memcpy (alpha_prime, Uty); gsl_vector_mul (alpha_prime, weight_Hi); gsl_vector_scale (alpha_prime, sigma_b2); - //sample u + // Sample u. gsl_vector_memcpy (Utu, alpha_prime); gsl_vector_mul (Utu, K_eval); if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} gsl_vector_add (Utu, Utu_rand); - //for quantitative traits, calculate pve and ppe + // For quantitative traits, calculate pve and ppe. if (a_mode==11) { gsl_blas_ddot (Utu, Utu, &d); cHyp.pve=d/(double)ni_test; @@ -446,12 +446,13 @@ double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, gs cHyp.pge=0.0; } - //calculate likelihood + // Calculate likelihood. logpost=-0.5*logdet_H; if (a_mode==11) {logpost-=0.5*(double)ni_test*log(Hi_yy);} else {logpost-=0.5*Hi_yy;} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ + ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); gsl_vector_free (Utu_rand); gsl_vector_free (weight_Hi); @@ -459,18 +460,22 @@ double BSLMM::CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, gs return logpost; } - -double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *UtXb, gsl_vector *Utu, gsl_vector *alpha_prime, gsl_vector *beta, class HYPBSLMM &cHyp) -{ +double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *UtXb, gsl_vector *Utu, + gsl_vector *alpha_prime, gsl_vector *beta, + class HYPBSLMM &cHyp) { clock_t time_start; - double sigma_a2=cHyp.h*cHyp.rho/(trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); + double sigma_a2=cHyp.h*cHyp.rho/ + (trace_G*(1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); double sigma_b2=cHyp.h*(1.0-cHyp.rho)/(trace_G*(1-cHyp.h)); double logpost=0.0; double d, ds, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, UtXgamma->size2); + gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, + UtXgamma->size2); gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); @@ -481,7 +486,8 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, logdet_H=0.0; P_yy=0.0; for (size_t i=0; i<ni_test; ++i) { - gsl_vector_view UtXgamma_row=gsl_matrix_row (UtXgamma_eval, i); + gsl_vector_view UtXgamma_row= + gsl_matrix_row (UtXgamma_eval, i); d=gsl_vector_get (K_eval, i)*sigma_b2; ds=d/(d+1.0); d=1.0/(d+1.0); @@ -492,10 +498,10 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, P_yy+=d*uy*uy; gsl_vector_scale (&UtXgamma_row.vector, d); - gsl_vector_set (Utu_rand, i, gsl_ran_gaussian(gsl_r, 1)*sqrt(ds)); + gsl_vector_set(Utu_rand,i,gsl_ran_gaussian(gsl_r,1)*sqrt(ds)); } - //calculate Omega + // Calculate Omega. gsl_matrix_set_identity (Omega); time_start=clock(); @@ -504,8 +510,8 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //calculate beta_hat - gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); + // Calculate beta_hat. + gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); logdet_O=CholeskySolve(Omega, XtHiy, beta_hat); @@ -514,11 +520,13 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, gsl_blas_ddot (XtHiy, beta_hat, &d); P_yy-=d; - //sample tau + // Sample tau. double tau=1.0; - if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); } + if (a_mode==11) { + tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); + } - //sample beta + // Sample beta. for (size_t i=0; i<beta->size; i++) { d=gsl_ran_gaussian(gsl_r, 1); @@ -526,27 +534,25 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, } gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); - - //it compuates inv(L^T(Omega)) %*% beta; + // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(beta, sqrt(sigma_a2/tau)); gsl_vector_add(beta, beta_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); - //sample alpha + // Sample alpha. gsl_vector_memcpy (alpha_prime, Uty); gsl_vector_sub (alpha_prime, UtXb); gsl_vector_mul (alpha_prime, weight_Hi); gsl_vector_scale (alpha_prime, sigma_b2); - //sample u + // Sample u. gsl_vector_memcpy (Utu, alpha_prime); gsl_vector_mul (Utu, K_eval); if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} gsl_vector_add (Utu, Utu_rand); - - //for quantitative traits, calculate pve and pge + // For quantitative traits, calculate pve and pge. if (a_mode==11) { gsl_blas_ddot (UtXb, UtXb, &d); cHyp.pge=d/(double)ni_test; @@ -558,7 +564,6 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, else {cHyp.pge/=cHyp.pve;} cHyp.pve/=cHyp.pve+1.0/tau; } - gsl_matrix_free (UtXgamma_eval); gsl_matrix_free (Omega); @@ -570,17 +575,15 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, logpost=-0.5*logdet_H-0.5*logdet_O; if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} else {logpost-=0.5*P_yy;} -// else {logpost+=-0.5*P_yy*tau+0.5*(double)ni_test*log(tau);} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ + ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); return logpost; } - - -//calculate pve and pge, and calculate z_hat for case-control data -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) -{ +// Calculate pve and pge, and calculate z_hat for case-control data. +void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; gsl_blas_ddot (Utu, Utu, &d); @@ -594,10 +597,10 @@ void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, gsl_vector return; } - -//calculate pve and pge, and calculate z_hat for case-control data -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) -{ +// Calculate pve and pge, and calculate z_hat for case-control data. +void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { double d; gsl_vector *UtXbU=gsl_vector_alloc (Utu->size); @@ -620,18 +623,17 @@ void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, const gsl return; } - - - -void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z) -{ +void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, + gsl_vector *z) { double d1, d2, z_rand=0.0; for (size_t i=0; i<z->size; ++i) { d1=gsl_vector_get (y, i); d2=gsl_vector_get (z_hat, i); - //y is centerred for case control studies + + // y is centered for case control studies. if (d1<=0.0) { - //control, right truncated + + // Control, right truncated. do { z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); } while (z_rand>0.0); @@ -648,12 +650,8 @@ void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z return; } - - - - -double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) -{ +double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { double h=cHyp_old.h, rho=cHyp_old.rho; @@ -668,30 +666,13 @@ double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp if (rho<rho_min) {rho=2*rho_min-rho;} if (rho>rho_max) {rho=2*rho_max-rho;} } - /* - //Grid Sampling - for (size_t i=0; i<repeat; ++i) { - if (gsl_rng_uniform(gsl_r)<0.66) {continue;} - h=h+(gsl_rng_uniform_int(gsl_r, 2)-0.5)*0.1; - if (h<h_min) {h=h_max;} - if (h>h_max) {h=h_min;} - } - - for (size_t i=0; i<repeat; ++i) { - if (gsl_rng_uniform(gsl_r)<0.66) {continue;} - rho=rho+(gsl_rng_uniform_int(gsl_r, 2)-0.5)*0.1; - if (rho<rho_min) {rho=rho_max;} - if (rho>rho_max) {rho=rho_min;} - } - */ cHyp_new.h=h; cHyp_new.rho=rho; return 0.0; } - -double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) -{ +double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp; double log_ratio=0.0; @@ -700,36 +681,25 @@ double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_ne for (size_t i=0; i<repeat; ++i) { logp_new=logp_old+(gsl_rng_uniform(gsl_r)-0.5)*d_logp; if (logp_new<logp_min) {logp_new=2*logp_min-logp_new;} - if (logp_new>logp_max) {logp_new=2*logp_max-logp_new;} - + if (logp_new>logp_max) {logp_new=2*logp_max-logp_new;} log_ratio+=logp_new-logp_old; logp_old=logp_new; } - /* - //Grid Sampling - for (size_t i=0; i<repeat; ++i) { - if (gsl_rng_uniform(gsl_r)<0.66) {continue;} - logp_new=logp_old+(gsl_rng_uniform_int(gsl_r, 2)-0.5)*0.5*log(10.0); - if (logp_new<logp_min) {logp_new=logp_max;} - if (logp_new>logp_max) {logp_new=logp_min;} - - log_ratio+=logp_new-logp_old; - logp_old=logp_new; - } - */ cHyp_new.logp=logp_new; return log_ratio; } -bool comp_vec (size_t a, size_t b) -{ +bool comp_vec (size_t a, size_t b) { return (a < b); } - -double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank_new, const double *p_gamma, const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) -{ +double BSLMM::ProposeGamma (const vector<size_t> &rank_old, + vector<size_t> &rank_new, + const double *p_gamma, + const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, + const size_t &repeat) { map<size_t, int> mapRank2in; size_t r; double unif, logp=0.0; @@ -752,11 +722,19 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank unif=gsl_rng_uniform(gsl_r); if (unif < 0.40 && cHyp_new.n_gamma<s_max) {flag_gamma=1;} - else if (unif>=0.40 && unif < 0.80 && cHyp_new.n_gamma>s_min) {flag_gamma=2;} - else if (unif>=0.80 && cHyp_new.n_gamma>0 && cHyp_new.n_gamma<ns_test) {flag_gamma=3;} + else if (unif>=0.40 && unif < 0.80 && + cHyp_new.n_gamma>s_min) { + flag_gamma=2; + } + else if (unif>=0.80 && cHyp_new.n_gamma>0 && + cHyp_new.n_gamma<ns_test) { + flag_gamma=3; + } else {flag_gamma=4;} - if(flag_gamma==1) {//add a snp; + if(flag_gamma==1) { + + // Add a SNP. do { r_add=gsl_ran_discrete (gsl_r, gsl_t); } while (mapRank2in.count(r_add)!=0); @@ -770,10 +748,13 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank mapRank2in[r_add]=1; rank_new.push_back(r_add); cHyp_new.n_gamma++; - logp+=-log(p_gamma[r_add]/prob_total)-log((double)cHyp_new.n_gamma); + logp+=-log(p_gamma[r_add]/prob_total)- + log((double)cHyp_new.n_gamma); } - else if (flag_gamma==2) {//delete a snp; - col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + else if (flag_gamma==2) { + + // Delete a SNP. + col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); r_remove=rank_new[col_id]; double prob_total=1.0; @@ -785,13 +766,17 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank mapRank2in.erase(r_remove); rank_new.erase(rank_new.begin()+col_id); - logp+=log(p_gamma[r_remove]/prob_total)+log((double)cHyp_new.n_gamma); + logp+=log(p_gamma[r_remove]/prob_total)+ + log((double)cHyp_new.n_gamma); cHyp_new.n_gamma--; } - else if (flag_gamma==3) {//switch a snp; - col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + else if (flag_gamma==3) { + + // Switch a SNP. + col_id=gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); r_remove=rank_new[col_id]; - //careful with the proposal + + // Be careful with the proposal. do { r_add=gsl_ran_discrete (gsl_r, gsl_t); } while (mapRank2in.count(r_add)!=0); @@ -802,7 +787,8 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank prob_total-=p_gamma[r]; } - logp+=log(p_gamma[r_remove]/(prob_total+p_gamma[r_remove]-p_gamma[r_add]) ); + logp+=log(p_gamma[r_remove]/ + (prob_total+p_gamma[r_remove]-p_gamma[r_add])); logp-=log(p_gamma[r_add]/prob_total); mapRank2in.erase(r_remove); @@ -810,7 +796,7 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank rank_new.erase(rank_new.begin()+col_id); rank_new.push_back(r_add); } - else {logp+=0;}//do not change + else {logp+=0;} // Do not change. } stable_sort (rank_new.begin(), rank_new.end(), comp_vec); @@ -819,24 +805,14 @@ double BSLMM::ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank return logp; } - - - - - -bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) -{ +bool comp_lr (pair<size_t, double> a, pair<size_t, double> b) { return (a.second > b.second); } - - - - - - -//if a_mode==13 then Uty==y -void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) { +// If a_mode==13 then Uty==y. +void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y) { clock_t time_start; class HYPBSLMM cHyp_old, cHyp_new; @@ -890,9 +866,10 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * mapRank2pos[i]=pos_loglr[i].first; } - //calculate proposal distribution for gamma (unnormalized), and set up gsl_r and gsl_t - gsl_rng_env_setup(); - const gsl_rng_type * gslType; + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. + gsl_rng_env_setup(); + const gsl_rng_type * gslType; gslType = gsl_rng_default; if (randseed<0) { @@ -900,7 +877,8 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * time (&rawtime); tm * ptm = gmtime (&rawtime); - randseed = (unsigned) (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec); + randseed = (unsigned) (ptm->tm_hour%24*3600+ + ptm->tm_min*60+ptm->tm_sec); } gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); @@ -910,17 +888,14 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - //initial parameters + // Initial parameters. InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr); -// if (fix_sigma>=0) { -// rho_max=1-fix_sigma; -// cHyp_old.h=fix_sigma/(1-cHyp_old.rho); -// } cHyp_initial=cHyp_old; if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, + cHyp_old); beta_old.clear(); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { @@ -928,10 +903,12 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * } } else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); + gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, + cHyp_old.n_gamma); gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma); SetXgamma (UtXgamma, UtX, rank_old); - logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); + logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, + Utu_old, alpha_old, beta, cHyp_old); beta_old.clear(); for (size_t i=0; i<beta->size; ++i) { @@ -941,7 +918,7 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * gsl_vector_free (beta); } - //calculate centered z_hat, and pve + // Calculate centered z_hat, and pve. if (a_mode==13) { time_start=clock(); if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { @@ -953,15 +930,17 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } - //start MCMC + // Start MCMC. int accept; size_t total_step=w_step+s_step; size_t w=0, w_col, pos; size_t repeat=0; for (size_t t=0; t<total_step; ++t) { - if (t%d_pace==0 || t==total_step-1) {ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1));} -// if (t>10) {break;} + if (t%d_pace==0 || t==total_step-1) { + ProgressBar ("Running MCMC ", t, total_step-1, + (double)n_accept/(double)(t*n_mh+1)); + } if (a_mode==13) { SampleZ (y, z_hat, z); @@ -969,60 +948,75 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * time_start=clock(); gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz); - time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_UtZ+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); - //First proposal - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + // First proposal. + if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { + logPost_old= + CalcPosterior(Utz, K_eval, Utu_old, + alpha_old, cHyp_old); beta_old.clear(); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { beta_old.push_back(0); } } else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); - gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma); + gsl_matrix *UtXgamma= + gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); + gsl_vector *beta= + gsl_vector_alloc (cHyp_old.n_gamma); SetXgamma (UtXgamma, UtX, rank_old); - logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); + logPost_old= + CalcPosterior(UtXgamma, Utz, K_eval, + UtXb_old, Utu_old, alpha_old, + beta, cHyp_old); beta_old.clear(); for (size_t i=0; i<beta->size; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); + beta_old.push_back(gsl_vector_get(beta, i)); } gsl_matrix_free (UtXgamma); gsl_vector_free (beta); } } - //MH steps + // M-H steps. for (size_t i=0; i<n_mh; ++i) { - if (gsl_rng_uniform(gsl_r)<0.33) {repeat = 1+gsl_rng_uniform_int(gsl_r, 20);} - else {repeat=1;} + if (gsl_rng_uniform(gsl_r)<0.33) { + repeat = 1+gsl_rng_uniform_int(gsl_r, 20); + } + else { + repeat=1; + } logMHratio=0.0; - logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat); - logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma, + cHyp_old, cHyp_new, repeat); logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); -// if (fix_sigma>=0) { -// cHyp_new.h=fix_sigma/(1-cHyp_new.rho); -// } - if (cHyp_new.n_gamma==0 || cHyp_new.rho==0) { - logPost_new=CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new); + logPost_new=CalcPosterior(Utz, K_eval, Utu_new, + alpha_new, cHyp_new); beta_new.clear(); for (size_t i=0; i<cHyp_new.n_gamma; ++i) { beta_new.push_back(0); } } else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_new.n_gamma); - gsl_vector *beta=gsl_vector_alloc (cHyp_new.n_gamma); + gsl_matrix *UtXgamma= + gsl_matrix_alloc (ni_test, cHyp_new.n_gamma); + gsl_vector *beta= + gsl_vector_alloc (cHyp_new.n_gamma); SetXgamma (UtXgamma, UtX, rank_new); - logPost_new=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new, alpha_new, beta, cHyp_new); + logPost_new= + CalcPosterior(UtXgamma, Utz, K_eval, + UtXb_new, Utu_new, alpha_new, + beta, cHyp_new); beta_new.clear(); for (size_t i=0; i<beta->size; ++i) { - beta_new.push_back(gsl_vector_get(beta, i)); + beta_new.push_back(gsl_vector_get(beta, i)); } gsl_matrix_free (UtXgamma); gsl_vector_free (beta); @@ -1030,17 +1024,20 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * logMHratio+=logPost_new-logPost_old; - if (logMHratio>0 || log(gsl_rng_uniform(gsl_r))<logMHratio) {accept=1; n_accept++;} + if (logMHratio>0 || + log(gsl_rng_uniform(gsl_r))<logMHratio) { + accept=1; n_accept++; + } else {accept=0;} if (accept==1) { logPost_old=logPost_new; rank_old.clear(); beta_old.clear(); if (rank_new.size()!=0) { - for (size_t i=0; i<rank_new.size(); ++i) { - rank_old.push_back(rank_new[i]); - beta_old.push_back(beta_new[i]); - } + for (size_t i=0; i<rank_new.size(); ++i) { + rank_old.push_back(rank_new[i]); + beta_old.push_back(beta_new[i]); + } } cHyp_old=cHyp_new; gsl_vector_memcpy (alpha_old, alpha_new); @@ -1050,51 +1047,58 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * else {cHyp_new=cHyp_old;} } - //calculate z_hat, and pve + // Calculate z_hat, and pve. if (a_mode==13) { time_start=clock(); if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old); } else { - CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old); + CalcCC_PVEnZ (U, UtXb_old, Utu_old, + z_hat, cHyp_old); } - //sample mu and update z hat + // Sample mu and update z_hat. gsl_vector_sub (z, z_hat); mean_z+=CenterVector(z); - mean_z+=gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test) ); - + mean_z+= + gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test)); gsl_vector_add_constant (z_hat, mean_z); - time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_UtZ+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); } - //Save data + // Save data. if (t<w_step) {continue;} else { if (t%r_pace==0) { w_col=w%w_pace; if (w_col==0) { - if (w==0) {WriteResult (0, Result_hyp, Result_gamma, w_col);} + if (w==0) { + WriteResult (0, Result_hyp, + Result_gamma, w_col); + } else { - WriteResult (1, Result_hyp, Result_gamma, w_col); - gsl_matrix_set_zero (Result_hyp); - gsl_matrix_set_zero (Result_gamma); + WriteResult (1, Result_hyp, + Result_gamma, w_col); + gsl_matrix_set_zero (Result_hyp); + gsl_matrix_set_zero (Result_gamma); } } - gsl_matrix_set (Result_hyp, w_col, 0, cHyp_old.h); - gsl_matrix_set (Result_hyp, w_col, 1, cHyp_old.pve); - gsl_matrix_set (Result_hyp, w_col, 2, cHyp_old.rho); - gsl_matrix_set (Result_hyp, w_col, 3, cHyp_old.pge); - gsl_matrix_set (Result_hyp, w_col, 4, cHyp_old.logp); - gsl_matrix_set (Result_hyp, w_col, 5, cHyp_old.n_gamma); + gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h); + gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve); + gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho); + gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge); + gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp); + gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { pos=mapRank2pos[rank_old[i]]+1; - gsl_matrix_set (Result_gamma, w_col, i, pos); + gsl_matrix_set(Result_gamma,w_col,i, + pos); beta_g[pos-1].first+=beta_old[i]; beta_g[pos-1].second+=1.0; @@ -1138,7 +1142,8 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * } gsl_vector *alpha=gsl_vector_alloc (ns_test); - gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX, alpha_prime, 0.0, alpha); + gsl_blas_dgemv (CblasTrans, 1.0/(double)ns_test, UtX, + alpha_prime, 0.0, alpha); WriteParam (beta_g, alpha, w); gsl_vector_free(alpha); @@ -1154,10 +1159,9 @@ void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector * return; } - - -void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *eval, const double lambda) -{ +void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *eval, + const double lambda) { gsl_vector *beta=gsl_vector_alloc (UtX->size2); gsl_vector *H_eval=gsl_vector_alloc (Uty->size); gsl_vector *bv=gsl_vector_alloc (Uty->size); @@ -1169,7 +1173,8 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector gsl_vector_memcpy (bv, Uty); gsl_vector_div (bv, H_eval); - gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2, UtX, bv, 0.0, beta); + gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2, + UtX, bv, 0.0, beta); gsl_vector_add_constant (H_eval, -1.0); gsl_vector_mul (H_eval, bv); gsl_blas_dgemv (CblasNoTrans, 1.0, U, H_eval, 0.0, bv); @@ -1183,28 +1188,13 @@ void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector return; } - - - - - - - - - - - - - - - - -//below fits MCMC for rho=1 -void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) -{ +// Below fits MCMC for rho=1. +void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { time_t time_start=clock(); - gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); + gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, + s_size); gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size); @@ -1217,29 +1207,34 @@ void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_si return; } - -void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_matrix *XtX_old, const gsl_vector *Xty_old, const gsl_vector *y, const vector<size_t> &rank_old, const vector<size_t> &rank_new, gsl_matrix *X_new, gsl_matrix *XtX_new, gsl_vector *Xty_new) -{ +void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector<size_t> &rank_old, + const vector<size_t> &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new) { double d; - //rank_old and rank_new are sorted already inside PorposeGamma - //calculate vectors rank_remove and rank_add - // size_t v_size=max(rank_old.size(), rank_new.size()); - //make sure that v_size is larger than repeat + // rank_old and rank_new are sorted already inside PorposeGamma + // calculate vectors rank_remove and rank_add. + // make sure that v_size is larger than repeat. size_t v_size=20; - vector<size_t> rank_remove(v_size), rank_add(v_size), rank_union(s_max+v_size); + vector<size_t> rank_remove(v_size), rank_add(v_size), + rank_union(s_max+v_size); vector<size_t>::iterator it; - it=set_difference (rank_old.begin(), rank_old.end(), rank_new.begin(), rank_new.end(), rank_remove.begin()); + it=set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), + rank_new.end(), rank_remove.begin()); rank_remove.resize(it-rank_remove.begin()); - it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(), rank_old.end(), rank_add.begin()); + it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_add.begin()); rank_add.resize(it-rank_add.begin()); - it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(), rank_old.end(), rank_union.begin()); + it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_union.begin()); rank_union.resize(it-rank_union.begin()); - //map rank_remove and rank_add + // Map rank_remove and rank_add. map<size_t, int> mapRank2in_remove, mapRank2in_add; for (size_t i=0; i<rank_remove.size(); i++) { mapRank2in_remove[rank_remove[i]]=1; @@ -1248,16 +1243,23 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m mapRank2in_add[rank_add[i]]=1; } - //obtain the subset of matrix/vector - gsl_matrix_const_view Xold_sub=gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); - gsl_matrix_const_view XtXold_sub=gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(), rank_old.size()); - gsl_vector_const_view Xtyold_sub=gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); - - gsl_matrix_view Xnew_sub=gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); - gsl_matrix_view XtXnew_sub=gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); - gsl_vector_view Xtynew_sub=gsl_vector_subvector(Xty_new, 0, rank_new.size()); - - //get X_new and calculate XtX_new + // Obtain the subset of matrix/vector. + gsl_matrix_const_view Xold_sub= + gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); + gsl_matrix_const_view XtXold_sub= + gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(), + rank_old.size()); + gsl_vector_const_view Xtyold_sub= + gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); + + gsl_matrix_view Xnew_sub= + gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); + gsl_matrix_view XtXnew_sub= + gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtynew_sub= + gsl_vector_subvector(Xty_new, 0, rank_new.size()); + + // Get X_new and calculate XtX_new. if (rank_remove.size()==0 && rank_add.size()==0) { gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); @@ -1295,13 +1297,13 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m gsl_matrix *XtX_ao=gsl_matrix_alloc(X_add->size2, X_old->size2); gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2); - //get X_add + // Get X_add. SetXgamma (X_add, X, rank_add); - //get t(X_add)X_add and t(X_add)X_temp + // Get t(X_add)X_add and t(X_add)X_temp. clock_t time_start=clock(); - //somehow the lapack_dgemm does not work here + // Somehow the lapack_dgemm does not work here. gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old, @@ -1310,18 +1312,26 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //save to X_new, XtX_new and Xty_new + // Save to X_new, XtX_new and Xty_new. i_old=0; i_new=0; i_add=0; for (size_t i=0; i<rank_union.size(); i++) { - if (mapRank2in_remove.count(rank_old[i_old])!=0) {i_old++; continue;} - if (mapRank2in_add.count(rank_new[i_new])!=0) {i_flag=1;} else {i_flag=0;} + if (mapRank2in_remove.count(rank_old[i_old])!=0) { + i_old++; + continue; + } + if (mapRank2in_add.count(rank_new[i_new])!=0) { + i_flag=1; + } else { + i_flag=0; + } gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i_new); if (i_flag==1) { gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i_add); gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); } else { - gsl_vector_const_view Xcopy_col=gsl_matrix_const_column(X_old, i_old); + gsl_vector_const_view Xcopy_col= + gsl_matrix_const_column(X_old, i_old); gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); } @@ -1334,8 +1344,15 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m j_old=i_old; j_new=i_new; j_add=i_add; for (size_t j=i; j<rank_union.size(); j++) { - if (mapRank2in_remove.count(rank_old[j_old])!=0) {j_old++; continue;} - if (mapRank2in_add.count(rank_new[j_new])!=0) {j_flag=1;} else {j_flag=0;} + if (mapRank2in_remove.count(rank_old[j_old])!=0) { + j_old++; + continue; + } + if (mapRank2in_add.count(rank_new[j_new])!=0) { + j_flag=1; + } else { + j_flag=0; + } if (i_flag==1 && j_flag==1) { d=gsl_matrix_get(XtX_aa, i_add, j_add); @@ -1372,37 +1389,40 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m return; } - -double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) -{ +double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) { double logpost=0.0; - //for quantitative traits, calculate pve and pge - //pve and pge for case/control data are calculted in CalcCC_PVEnZ + // For quantitative traits, calculate pve and pge. + // Pve and pge for case/control data are calculted in CalcCC_PVEnZ. if (a_mode==11) { cHyp.pve=0.0; cHyp.pge=1.0; } - //calculate likelihood + // Calculate likelihood. if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);} else {logpost-=0.5*yty;} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ + ((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); return logpost; } - -double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp) -{ +double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, + const gsl_vector *Xty, const double yty, + const size_t s_size, gsl_vector *Xb, + gsl_vector *beta, class HYPBSLMM &cHyp) { double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); double logpost=0.0; double d, P_yy=yty, logdet_O=0.0; - gsl_matrix_const_view Xgamma_sub=gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size); - gsl_matrix_const_view XtX_sub=gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); - gsl_vector_const_view Xty_sub=gsl_vector_const_subvector (Xty, 0, s_size); + gsl_matrix_const_view Xgamma_sub= + gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size); + gsl_matrix_const_view XtX_sub= + gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); + gsl_vector_const_view Xty_sub= + gsl_vector_const_subvector (Xty, 0, s_size); gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size); gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size); @@ -1411,38 +1431,42 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, co gsl_vector_memcpy (Xty_temp, &Xty_sub.vector); - //calculate Omega + // Calculate Omega. gsl_matrix_memcpy (Omega, &XtX_sub.matrix); gsl_matrix_scale (Omega, sigma_a2); gsl_matrix_set_identity (M_temp); gsl_matrix_add (Omega, M_temp); - //calculate beta_hat + // Calculate beta_hat. logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); gsl_vector_scale (beta_hat, sigma_a2); gsl_blas_ddot (Xty_temp, beta_hat, &d); P_yy-=d; - //sample tau + // Sample tau. double tau=1.0; - if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); } + if (a_mode==11) { + tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); + } - //sample beta + // Sample beta. for (size_t i=0; i<s_size; i++) { d=gsl_ran_gaussian(gsl_r, 1); gsl_vector_set(beta, i, d); } gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size); - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, &beta_sub.vector); + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, + &beta_sub.vector); - //it compuates inv(L^T(Omega)) %*% beta; + // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau)); gsl_vector_add(&beta_sub.vector, beta_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, Xb); + gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, + &beta_sub.vector, 0.0, Xb); - //for quantitative traits, calculate pve and pge + // For quantitative traits, calculate pve and pge. if (a_mode==11) { gsl_blas_ddot (Xb, Xb, &d); cHyp.pve=d/(double)ni_test; @@ -1454,7 +1478,8 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, co if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} else {logpost-=0.5*P_yy;} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); + logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ + ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); gsl_matrix_free (Omega); gsl_matrix_free (M_temp); @@ -1464,9 +1489,7 @@ double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, co return logpost; } - - -//calculate pve and pge, and calculate z_hat for case-control data +// Calculate pve and pge, and calculate z_hat for case-control data. void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) { gsl_vector_set_zero(z_hat); @@ -1475,10 +1498,9 @@ void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) return; } - -//calculate pve and pge, and calculate z_hat for case-control data -void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, class HYPBSLMM &cHyp) -{ +// Calculate pve and pge, and calculate z_hat for case-control data. +void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { double d; gsl_blas_ddot (Xb, Xb, &d); @@ -1491,9 +1513,7 @@ void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, class HYPBSLM return; } - - -//if a_mode==13, then run probit model +// If a_mode==13, then run probit model. void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { clock_t time_start; double time_set=0, time_post=0; @@ -1520,7 +1540,9 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { double ztz=0.0; gsl_vector_memcpy (z, y); - //for quantitative traits, y is centered already in gemma.cpp, but just in case + + // For quantitative traits, y is centered already in + // gemma.cpp, but just in case. double mean_z=CenterVector (z); gsl_blas_ddot(z, z, &ztz); @@ -1549,9 +1571,10 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { mapRank2pos[i]=pos_loglr[i].first; } - //calculate proposal distribution for gamma (unnormalized), and set up gsl_r and gsl_t + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. gsl_rng_env_setup(); - const gsl_rng_type * gslType; + const gsl_rng_type * gslType; gslType = gsl_rng_default; if (randseed<0) { @@ -1559,7 +1582,8 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { time (&rawtime); tm * ptm = gmtime (&rawtime); - randseed = (unsigned) (ptm->tm_hour%24*3600+ptm->tm_min*60+ptm->tm_sec); + randseed = (unsigned) (ptm->tm_hour%24*3600+ + ptm->tm_min*60+ptm->tm_sec); } gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); @@ -1569,7 +1593,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - //initial parameters + // Initial parameters. InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr); cHyp_initial=cHyp_old; @@ -1580,10 +1604,12 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { else { SetXgamma (Xgamma_old, X, rank_old); CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); - logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); + logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, + rank_old.size(), Xb_old, beta_old, + cHyp_old); } - //calculate centered z_hat, and pve + // Calculate centered z_hat, and pve. if (a_mode==13) { if (cHyp_old.n_gamma==0) { CalcCC_PVEnZ (z_hat, cHyp_old); @@ -1593,65 +1619,94 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { } } - //start MCMC + // Start MCMC. int accept; size_t total_step=w_step+s_step; size_t w=0, w_col, pos; size_t repeat=0; for (size_t t=0; t<total_step; ++t) { - if (t%d_pace==0 || t==total_step-1) {ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1));} -// if (t>10) {break;} + if (t%d_pace==0 || t==total_step-1) { + ProgressBar ("Running MCMC ", t, total_step-1, + (double)n_accept/(double)(t*n_mh+1)); + } + if (a_mode==13) { SampleZ (y, z_hat, z); mean_z=CenterVector (z); gsl_blas_ddot(z,z,&ztz); - //First proposal + // First proposal. if (cHyp_old.n_gamma==0) { logPost_old=CalcPosterior (ztz, cHyp_old); } else { - gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size()); - gsl_vector_view Xtz_sub=gsl_vector_subvector(Xtz_old, 0, rank_old.size()); - gsl_blas_dgemv (CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0, &Xtz_sub.vector); - logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); + gsl_matrix_view Xold_sub= + gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, + rank_old.size()); + gsl_vector_view Xtz_sub= + gsl_vector_subvector(Xtz_old, 0, rank_old.size()); + gsl_blas_dgemv (CblasTrans, 1.0, &Xold_sub.matrix, + z, 0.0, &Xtz_sub.vector); + logPost_old= + CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, + rank_old.size(), Xb_old, beta_old, + cHyp_old); } } - //MH steps + // M-H steps. for (size_t i=0; i<n_mh; ++i) { - if (gsl_rng_uniform(gsl_r)<0.33) {repeat = 1+gsl_rng_uniform_int(gsl_r, 20);} + if (gsl_rng_uniform(gsl_r)<0.33) { + repeat = 1+gsl_rng_uniform_int(gsl_r, 20); + } else {repeat=1;} logMHratio=0.0; - logMHratio+=ProposeHnRho(cHyp_old, cHyp_new, repeat); - logMHratio+=ProposeGamma (rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio+= + ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio+= + ProposeGamma (rank_old, rank_new, p_gamma, + cHyp_old, cHyp_new, repeat); logMHratio+=ProposePi(cHyp_old, cHyp_new, repeat); if (cHyp_new.n_gamma==0) { logPost_new=CalcPosterior (ztz, cHyp_new); } else { - //this if makes sure that rank_old.size()==rank_remove.size() does not happen + + // This makes sure that rank_old.size() == + // rank_remove.size() does not happen. if (cHyp_new.n_gamma<=20 || cHyp_old.n_gamma<=20) { time_start=clock(); SetXgamma (Xgamma_new, X, rank_new); - CalcXtX (Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new); - time_set+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + CalcXtX (Xgamma_new, z, rank_new.size(), + XtX_new, Xtz_new); + time_set+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); } else { time_start=clock(); - SetXgamma (X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new, Xgamma_new, XtX_new, Xtz_new); - time_set+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + SetXgamma (X, Xgamma_old, XtX_old, Xtz_old, z, + rank_old, rank_new, Xgamma_new, + XtX_new, Xtz_new); + time_set+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); } time_start=clock(); - logPost_new=CalcPosterior (Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(), Xb_new, beta_new, cHyp_new); - time_post+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + logPost_new= + CalcPosterior (Xgamma_new, XtX_new, Xtz_new, ztz, + rank_new.size(), Xb_new, beta_new, + cHyp_new); + time_post+=(clock()-time_start)/ + (double(CLOCKS_PER_SEC)*60.0); } logMHratio+=logPost_new-logPost_old; - if (logMHratio>0 || log(gsl_rng_uniform(gsl_r))<logMHratio) {accept=1; n_accept++;} + if (logMHratio>0 || + log(gsl_rng_uniform(gsl_r))<logMHratio) { + accept=1; + n_accept++; + } else {accept=0;} - //cout<<rank_new.size()<<"\t"<<rank_old.size()<<"\t"<<logPost_new<<"\t"<<logPost_old<<endl; if (accept==1) { logPost_old=logPost_new; cHyp_old=cHyp_new; @@ -1659,8 +1714,10 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { rank_old.clear(); if (rank_new.size()!=0) { - for (size_t i=0; i<rank_new.size(); ++i) { - rank_old.push_back(rank_new[i]); + for (size_t i=0; + i<rank_new.size(); + ++i) { + rank_old.push_back(rank_new[i]); } gsl_matrix_view Xold_sub=gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); @@ -1684,7 +1741,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { } - //calculate z_hat, and pve + // Calculate z_hat, and pve. if (a_mode==13) { if (cHyp_old.n_gamma==0) { CalcCC_PVEnZ (z_hat, cHyp_old); @@ -1693,41 +1750,47 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); } - //sample mu and update z hat + // Sample mu and update z_hat. gsl_vector_sub (z, z_hat); mean_z+=CenterVector(z); - mean_z+=gsl_ran_gaussian(gsl_r, sqrt(1.0/(double) ni_test) ); + mean_z+=gsl_ran_gaussian(gsl_r, + sqrt(1.0/(double) ni_test)); gsl_vector_add_constant (z_hat, mean_z); } - //Save data + // Save data. if (t<w_step) {continue;} else { if (t%r_pace==0) { w_col=w%w_pace; if (w_col==0) { - if (w==0) {WriteResult (0, Result_hyp, Result_gamma, w_col);} + if (w==0) { + WriteResult(0,Result_hyp, + Result_gamma,w_col); + } else { - WriteResult (1, Result_hyp, Result_gamma, w_col); - gsl_matrix_set_zero (Result_hyp); - gsl_matrix_set_zero (Result_gamma); + WriteResult(1,Result_hyp, + Result_gamma,w_col); + gsl_matrix_set_zero (Result_hyp); + gsl_matrix_set_zero (Result_gamma); } } - gsl_matrix_set (Result_hyp, w_col, 0, cHyp_old.h); - gsl_matrix_set (Result_hyp, w_col, 1, cHyp_old.pve); - gsl_matrix_set (Result_hyp, w_col, 2, cHyp_old.rho); - gsl_matrix_set (Result_hyp, w_col, 3, cHyp_old.pge); - gsl_matrix_set (Result_hyp, w_col, 4, cHyp_old.logp); - gsl_matrix_set (Result_hyp, w_col, 5, cHyp_old.n_gamma); + gsl_matrix_set(Result_hyp,w_col,0,cHyp_old.h); + gsl_matrix_set(Result_hyp,w_col,1,cHyp_old.pve); + gsl_matrix_set(Result_hyp,w_col,2,cHyp_old.rho); + gsl_matrix_set(Result_hyp,w_col,3,cHyp_old.pge); + gsl_matrix_set(Result_hyp,w_col,4,cHyp_old.logp); + gsl_matrix_set(Result_hyp,w_col,5,cHyp_old.n_gamma); for (size_t i=0; i<cHyp_old.n_gamma; ++i) { pos=mapRank2pos[rank_old[i]]+1; - - gsl_matrix_set (Result_gamma, w_col, i, pos); + gsl_matrix_set(Result_gamma,w_col, + i,pos); - beta_g[pos-1].first+=gsl_vector_get(beta_old, i); + beta_g[pos-1].first+= + gsl_vector_get(beta_old, i); beta_g[pos-1].second+=1.0; } @@ -1736,9 +1799,7 @@ void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { } w++; - } - } } cout<<endl; diff --git a/src/bslmm.h b/src/bslmm.h index 07aac67..da185fa 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -40,96 +40,140 @@ public: string file_out; string path_out; - // LMM related parameters + // LMM-related parameters. double l_min; double l_max; size_t n_region; double pve_null; double pheno_mean; - // BSLMM MCMC related parameters - double h_min, h_max, h_scale; //priors for h - double rho_min, rho_max, rho_scale; //priors for rho - double logp_min, logp_max, logp_scale; //priors for log(pi) - size_t s_min, s_max; //minimum and maximum number of gammas - size_t w_step; //number of warm up/burn in iterations - size_t s_step; //number of sampling iterations - size_t r_pace; //record pace - size_t w_pace; //write pace - size_t n_accept; //number of acceptance - size_t n_mh; //number of MH steps within each iteration - double geo_mean; //mean of the geometric distribution + // BSLMM MCMC-related parameters + double h_min, h_max, h_scale; // Priors for h. + double rho_min, rho_max, rho_scale; // Priors for rho. + double logp_min, logp_max, logp_scale; // Priors for log(pi). + size_t s_min, s_max; // Min. & max. number of gammas. + size_t w_step; // Number of warm up/burn in + // iterations. + size_t s_step; // Num. sampling iterations. + size_t r_pace; // Record pace. + size_t w_pace; // Write pace. + size_t n_accept; // Number of acceptances. + size_t n_mh; // Number of MH steps per iter. + double geo_mean; // Mean of geometric dist. long int randseed; double trace_G; HYPBSLMM cHyp_initial; - // Summary statistics - size_t ni_total, ns_total; //number of total individuals and snps - size_t ni_test, ns_test; //number of individuals and snps used for analysis - size_t n_cvt; //number of covariates + // Summary statistics. + size_t ni_total, ns_total; // Number of total individuals and SNPs + size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis. + size_t n_cvt; // Number of covariates. double time_UtZ; - double time_Omega; //time spent on optimization iterations - double time_Proposal; //time spent on constructing the proposal distribution for gamma (i.e. lmm or lm analysis) - vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis - vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis - - vector<SNPINFO> snpInfo; //record SNP information + double time_Omega; // Time spent on optimization iterations. + + // Time spent on constructing the proposal distribution for + // gamma (i.e. lmm or lm analysis). + double time_Proposal; + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + // Record SNP information. + vector<SNPINFO> snpInfo; - // Not included in PARAM + // Not included in PARAM. gsl_rng *gsl_r; gsl_ran_discrete_t *gsl_t; map<size_t, size_t> mapRank2pos; - // Main Functions + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); - void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *eval, const double lambda); + void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *eval, + const double lambda); - void MCMC (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y); + void MCMC (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y); void WriteLog (); void WriteLR (); void WriteBV (const gsl_vector *bv); - void WriteParam (vector<pair<double, double> > &beta_g, const gsl_vector *alpha, const size_t w); + void WriteParam (vector<pair<double, double> > &beta_g, + const gsl_vector *alpha, const size_t w); void WriteParam (const gsl_vector *alpha); - void WriteResult (const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col); + void WriteResult (const int flag, const gsl_matrix *Result_hyp, + const gsl_matrix *Result_gamma, const size_t w_col); - //Subfunctions inside MCMC + // Subfunctions inside MCMC. void CalcPgamma (double *p_gammar); - double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const double sigma_a2); - void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, vector<size_t> &rank_old, class HYPBSLMM &cHyp, vector<pair<size_t, double> > &pos_loglr); - double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *Utu, gsl_vector *alpha_prime, class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *UtXb, gsl_vector *Utu, gsl_vector *alpha_prime, gsl_vector *beta, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp); - double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty, const gsl_vector *K_eval); - double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, vector<pair<size_t, double> > &loglr_sort); //calculate the maximum marginal likelihood ratio for each analyzed SNPs with gemma, use it to rank SNPs - void SampleZ (const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z); - double ProposeHnRho (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat); - double ProposePi (const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat); - double ProposeGamma (const vector<size_t> &rank_old, vector<size_t> &rank_new, const double *p_gamma, const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat); - void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank); - - void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX_new, gsl_vector *Xty_new); - void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_matrix *XtX_old, const gsl_vector *Xty_old, const gsl_vector *y, const vector<size_t> &rank_old, const vector<size_t> &rank_new, gsl_matrix *X_new, gsl_matrix *XtX_new, gsl_vector *Xty_new); + double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2); + void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank_old, class HYPBSLMM &cHyp, + vector<pair<size_t, double> > &pos_loglr); + double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, + class HYPBSLMM &cHyp); + double CalcPosterior (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *UtXb, gsl_vector *Utu, + gsl_vector *alpha_prime, gsl_vector *beta, + class HYPBSLMM &cHyp); + void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp); + double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty, + const gsl_vector *K_eval); + + // Calculate the maximum marginal likelihood ratio for each + // analyzed SNPs with gemma, use it to rank SNPs. + double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + vector<pair<size_t, double> > &loglr_sort); + void SampleZ (const gsl_vector *y, const gsl_vector *z_hat, + gsl_vector *z); + double ProposeHnRho (const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat); + double ProposePi (const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, + const size_t &repeat); + double ProposeGamma (const vector<size_t> &rank_old, + vector<size_t> &rank_new, const double *p_gamma, + const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat); + void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, + vector<size_t> &rank); + + void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX_new, + gsl_vector *Xty_new); + void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector<size_t> &rank_old, + const vector<size_t> &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new); double CalcPosterior (const double yty, class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp); + double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, + const gsl_vector *Xty, const double yty, + const size_t s_size, gsl_vector *Xb, + gsl_vector *beta, class HYPBSLMM &cHyp); void CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp); void MCMC (const gsl_matrix *X, const gsl_vector *y); - - //utility functions -// double vec_sum (gsl_vector *v); -// void vec_center (gsl_vector *v); -// double calc_var (gsl_vector *v); -// void calc_sigma (MCMC &cMcmc); -// bool comp_lr (pair<size_t, double> a, pair<size_t, double> b); }; - - #endif diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp index e1b20d8..ebbff70 100644 --- a/src/bslmmdap.cpp +++ b/src/bslmmdap.cpp @@ -49,8 +49,7 @@ using namespace std; -void BSLMMDAP::CopyFromParam (PARAM &cPar) -{ +void BSLMMDAP::CopyFromParam (PARAM &cPar) { file_out=cPar.file_out; path_out=cPar.path_out; @@ -83,9 +82,7 @@ void BSLMMDAP::CopyFromParam (PARAM &cPar) return; } - -void BSLMMDAP::CopyToParam (PARAM &cPar) -{ +void BSLMMDAP::CopyToParam (PARAM &cPar) { cPar.time_UtZ=time_UtZ; cPar.time_Omega=time_Omega; @@ -94,13 +91,16 @@ void BSLMMDAP::CopyToParam (PARAM &cPar) -//read hyp file -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab) -{ +// Read hyp file. +void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, + vector<double> &vec_sb2, vector<double> &vec_wab) { vec_sa2.clear(); vec_sb2.clear(); vec_wab.clear(); igzstream infile (file_hyp.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open hyp file: "<<file_hyp<<endl; return;} + if (!infile) { + cout<<"error! fail to open hyp file: "<<file_hyp<<endl; + return; + } string line; char *ch_ptr; @@ -127,10 +127,9 @@ void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<doubl return; } - -//read bf file -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF) -{ +// Read bf file. +void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, + vector<vector<vector<double> > > &BF) { BF.clear(); vec_rs.clear(); igzstream infile (file_bf.c_str(), igzstream::in); @@ -172,7 +171,9 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<v if (t==0) { bf_size=vec_bf.size(); } else { - if (bf_size!=vec_bf.size()) {cout<<"error! unequal row size in bf file."<<endl;} + if (bf_size!=vec_bf.size()) { + cout<<"error! unequal row size in bf file."<<endl; + } } if (flag_block==0) { @@ -193,24 +194,28 @@ void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<v } -//read category files -//read both continuous and discrete category file, record mapRS2catc -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd) -{ +// Read category files. +// Read both continuous and discrete category file, record mapRS2catc. +void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, + size_t &kc, size_t &kd) { igzstream infile (file_cat.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open category file: "<<file_cat<<endl; return;} + if (!infile) { + cout<<"error! fail to open category file: "<<file_cat<<endl; + return; + } string line; char *ch_ptr; string rs, chr, a1, a0, pos, cm; - //read header + // Read header. HEADER header; !safeGetline(infile, line).eof(); ReadHeader_io (line, header); - //use the header to determine the number of categories + // Use the header to determine the number of categories. kc=header.catc_col.size(); kd=header.catd_col.size(); //set up storage and mapper @@ -219,7 +224,7 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_mat vector<double> catc; vector<int> catd; - //read the following lines to record mapRS2cat + // Read the following lines to record mapRS2cat. while (!safeGetline(infile, line).eof()) { ch_ptr=strtok ((char *)line.c_str(), " , \t"); @@ -255,7 +260,7 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_mat if (mapRS2catd.count(rs)==0 && kd>0) {mapRS2catd[rs]=catd;} } - //load into Ad and Ac + // Load into Ad and Ac. if (kc>0) { Ac=gsl_matrix_alloc(vec_rs.size(), kc); for (size_t i=0; i<vec_rs.size(); i++) { @@ -305,15 +310,7 @@ void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_mat return; } - - - - - - - -void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) -{ +void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) { string file_bf, file_hyp; file_bf=path_out+"/"+file_out; file_bf+=".bf.txt"; @@ -325,10 +322,17 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) outfile_bf.open (file_bf.c_str(), ofstream::out); outfile_hyp.open (file_hyp.c_str(), ofstream::out); - if (!outfile_bf) {cout<<"error writing file: "<<file_bf<<endl; return;} - if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;} + if (!outfile_bf) { + cout<<"error writing file: "<<file_bf<<endl; + return; + } + if (!outfile_hyp) { + cout<<"error writing file: "<<file_hyp<<endl; + return; + } - outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl; + outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<< + "weight"<<endl; outfile_hyp<<scientific; for (size_t i=0; i<Hyper->size1; i++) { for (size_t j=0; j<Hyper->size2; j++) { @@ -366,10 +370,9 @@ void BSLMMDAP::WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF) return; } - - -void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef) -{ +void BSLMMDAP::WriteResult (const vector<string> &vec_rs, + const gsl_matrix *Hyper, const gsl_vector *pip, + const gsl_vector *coef) { string file_gamma, file_hyp, file_coef; file_gamma=path_out+"/"+file_out; file_gamma+=".gamma.txt"; @@ -384,11 +387,21 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hype outfile_hyp.open (file_hyp.c_str(), ofstream::out); outfile_coef.open (file_coef.c_str(), ofstream::out); - if (!outfile_gamma) {cout<<"error writing file: "<<file_gamma<<endl; return;} - if (!outfile_hyp) {cout<<"error writing file: "<<file_hyp<<endl; return;} - if (!outfile_coef) {cout<<"error writing file: "<<file_coef<<endl; return;} + if (!outfile_gamma) { + cout<<"error writing file: "<<file_gamma<<endl; + return; + } + if (!outfile_hyp) { + cout<<"error writing file: "<<file_hyp<<endl; + return; + } + if (!outfile_coef) { + cout<<"error writing file: "<<file_coef<<endl; + return; + } - outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<<"weight"<<endl; + outfile_hyp<<"h"<<"\t"<<"rho"<<"\t"<<"sa2"<<"\t"<<"sb2"<<"\t"<< + "weight"<<endl; outfile_hyp<<scientific; for (size_t i=0; i<Hyper->size1; i++) { for (size_t j=0; j<Hyper->size2; j++) { @@ -397,10 +410,10 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hype outfile_hyp<<endl; } - outfile_gamma<<"rs"<<"\t"<<"gamma"<<endl; for (size_t i=0; i<vec_rs.size(); ++i) { - outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<<gsl_vector_get(pip, i)<<endl; + outfile_gamma<<vec_rs[i]<<"\t"<<scientific<<setprecision(6)<< + gsl_vector_get(pip, i)<<endl; } outfile_coef<<"coef"<<endl; @@ -419,25 +432,9 @@ void BSLMMDAP::WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hype } - - -/* -void BSLMMDAP::SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank) -{ - size_t pos; - for (size_t i=0; i<rank.size(); ++i) { - pos=mapRank2pos[rank[i]]; - gsl_vector_view Xgamma_col=gsl_matrix_column (Xgamma, i); - gsl_vector_const_view X_col=gsl_matrix_const_column (X, pos); - gsl_vector_memcpy (&Xgamma_col.vector, &X_col.vector); - } - - return; -} -*/ - -double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau) -{ +double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, + const gsl_vector *K_eval, + const double sigma_b2, const double tau) { gsl_vector *weight_Hi=gsl_vector_alloc (Uty->size); double logm=0.0; @@ -452,7 +449,7 @@ double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, Hi_yy+=d*uy*uy; } - //calculate likelihood + // Calculate likelihood. logm=-0.5*logdet_H-0.5*tau*Hi_yy+0.5*log(tau)*(double)ni_test; gsl_vector_free (weight_Hi); @@ -460,14 +457,17 @@ double BSLMMDAP::CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, return logm; } - -double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_a2, const double sigma_b2, const double tau) -{ +double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, + const gsl_vector *K_eval, + const double sigma_a2, + const double sigma_b2, const double tau) { clock_t time_start; double logm=0.0; double d, uy, P_yy=0, logdet_O=0.0, logdet_H=0.0; - gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, UtXgamma->size2); + gsl_matrix *UtXgamma_eval=gsl_matrix_alloc (UtXgamma->size1, + UtXgamma->size2); gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy=gsl_vector_alloc (UtXgamma->size2); gsl_vector *beta_hat=gsl_vector_alloc (UtXgamma->size2); @@ -477,7 +477,7 @@ double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty logdet_H=0.0; P_yy=0.0; for (size_t i=0; i<ni_test; ++i) { - gsl_vector_view UtXgamma_row=gsl_matrix_row (UtXgamma_eval, i); + gsl_vector_view UtXgamma_row=gsl_matrix_row(UtXgamma_eval,i); d=gsl_vector_get (K_eval, i)*sigma_b2; d=1.0/(d+1.0); gsl_vector_set (weight_Hi, i, d); @@ -488,7 +488,7 @@ double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty gsl_vector_scale (&UtXgamma_row.vector, d); } - //calculate Omega + // Calculate Omega. gsl_matrix_set_identity (Omega); time_start=clock(); @@ -496,7 +496,7 @@ double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty UtXgamma, 1.0, Omega); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //calculate beta_hat + // Calculate beta_hat. gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); logdet_O=CholeskySolve(Omega, XtHiy, beta_hat); @@ -512,24 +512,26 @@ double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty gsl_vector_free (beta_hat); gsl_vector_free (weight_Hi); - logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)*(double)ni_test; + logm=-0.5*logdet_H-0.5*logdet_O-0.5*tau*P_yy+0.5*log(tau)* + (double)ni_test; return logm; } - double BSLMMDAP::CalcPrior (class HYPBSLMM &cHyp) { double logprior=0; - logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); + logprior=((double)cHyp.n_gamma-1.0)*cHyp.logp+ + ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); return logprior; } - -//where A is the ni_test by n_cat matrix of annotations -void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) { +// Where A is the ni_test by n_cat matrix of annotations. +void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y) { clock_t time_start; - //set up BF + // Set up BF. double tau, h, rho, sigma_a2, sigma_b2, d; size_t ns_causal=10; size_t n_grid=h_ngrid*rho_ngrid; @@ -539,11 +541,13 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl gsl_matrix *Xgamma=gsl_matrix_alloc(ni_test, 1); gsl_matrix *Hyper=gsl_matrix_alloc(n_grid, 5); - //compute tau by using yty + // Compute tau by using yty. gsl_blas_ddot (Uty, Uty, &tau); tau=(double)ni_test/tau; - //set up grid values for sigma_a2 and sigma_b2 based on an approximately even grid for h and rho, and a fixed number of causals + // Set up grid values for sigma_a2 and sigma_b2 based on an + // approximately even grid for h and rho, and a fixed number + // of causals. size_t ij=0; for (size_t i=0; i<h_ngrid; i++) { h=h_min+(h_max-h_min)*(double)i/((double)h_ngrid-1); @@ -566,7 +570,7 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl } } - //compute BF factors + // Compute BF factors. time_start=clock(); cout<<"Calculating BF..."<<endl; for (size_t t=0; t<ns_test; t++) { @@ -587,21 +591,20 @@ void BSLMMDAP::DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl } time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //save results + // Save results. WriteResult (Hyper, BF); - //free matrices and vectors + // Free matrices and vectors. gsl_matrix_free(BF); gsl_matrix_free(Xgamma); gsl_matrix_free(Hyper); return; } - - - - -void single_ct_regression(const gsl_matrix_int *Xd, const gsl_vector_int *dlevel, const gsl_vector *pip_vec, gsl_vector *coef, gsl_vector *prior_vec) { +void single_ct_regression(const gsl_matrix_int *Xd, + const gsl_vector_int *dlevel, + const gsl_vector *pip_vec, + gsl_vector *coef, gsl_vector *prior_vec) { map<int,double> sum_pip; map<int,double> sum; @@ -623,27 +626,26 @@ void single_ct_regression(const gsl_matrix_int *Xd, const gsl_vector_int *dlevel gsl_vector_set(prior_vec,i,sum_pip[cat]/sum[cat]); } - //double baseline=0; for(int i=0;i<levels;i++){ double new_prior = sum_pip[i]/sum[i]; - //gsl_vector_set(coef, i, log(new_prior/(1-new_prior))-baseline); - //if(i==0){ - //baseline = log(new_prior/(1-new_prior)); - //} gsl_vector_set(coef, i, log(new_prior/(1-new_prior)) ); } return; } - - - -//where A is the ni_test by n_cat matrix of annotations -void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) { +// Where A is the ni_test by n_cat matrix of annotations. +void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, + const vector<string> &vec_rs, + const vector<double> &vec_sa2, + const vector<double> &vec_sb2, + const vector<double> &wab, + const vector<vector<vector<double> > > &BF, + gsl_matrix *Ac, gsl_matrix_int *Ad, + gsl_vector_int *dlevel) { clock_t time_start; - //set up BF + // Set up BF. double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save; size_t t1, t2; size_t n_grid=wab.size(), ns_test=vec_rs.size(); @@ -653,10 +655,10 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector gsl_vector *pip=gsl_vector_alloc(ns_test); gsl_vector *coef=gsl_vector_alloc(kc+kd+1); - //perform the EM algorithm + // Perform the EM algorithm. vector<double> vec_wab, vec_wab_new; - //initial values + // Initial values. for (size_t t=0; t<ns_test; t++) { gsl_vector_set (prior_vec, t, (double)BF.size()/(double)ns_test); } @@ -665,11 +667,12 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector vec_wab_new.push_back(wab[ij]); } - //EM iteration + // EM iteration. size_t it=0; double dif=1; while (it<100 && dif>1e-3) { - //update E_gamma + + // Update E_gamma. t1=0, t2=0; for (size_t b=0; b<BF.size(); b++) { s=1; @@ -678,7 +681,7 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector for (size_t ij=0; ij<n_grid; ij++) { d+=vec_wab_new[ij]*BF[b][m][ij]; } - d*=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1)); + d*=gsl_vector_get(prior_vec,t1)/(1-gsl_vector_get(prior_vec,t1)); gsl_vector_set(pip, t1, d); s+=d; @@ -692,7 +695,7 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector } } - //update E_wab + // Update E_wab. s=0; for (size_t ij=0; ij<n_grid; ij++) { vec_wab_new[ij]=0; @@ -701,7 +704,8 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector for (size_t b=0; b<BF.size(); b++) { d=1; for (size_t m=0; m<BF[b].size(); m++) { - d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; + d+=gsl_vector_get(prior_vec, t1)/ + (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; t1++; } vec_wab_new[ij]+=log(d); @@ -718,11 +722,12 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector for (size_t ij=0; ij<n_grid; ij++) { vec_wab_new[ij]/=d; - // vec_wab[ij]=vec_wab_new[ij]; } - //update coef, and pi - if(kc==0 && kd==0){//no annotation + // Update coef, and pi. + if(kc==0 && kd==0){ + + // No annotation. s=0; for (size_t t=0; t<pip->size; t++) { s+=gsl_vector_get(pip, t); @@ -733,22 +738,28 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector } gsl_vector_set (coef, 0, log(s/(1-s))); - } else if(kc==0 && kd!=0){//only discrete annotations + } else if(kc==0 && kd!=0){ + + // Only discrete annotations. if(kd == 1){ single_ct_regression(Ad, dlevel, pip, coef, prior_vec); }else{ logistic_cat_fit(coef, Ad, dlevel, pip, 0, 0); logistic_cat_pred(coef, Ad, dlevel, prior_vec); } - } else if (kc!=0 && kd==0) {//only continuous annotations + } else if (kc!=0 && kd==0) { + + // Only continuous annotations. logistic_cont_fit(coef, Ac, pip, 0, 0); logistic_cont_pred(coef, Ac, prior_vec); - } else if (kc!=0 && kd!=0) {//both continuous and categorical annotations + } else if (kc!=0 && kd!=0) { + + // Both continuous and categorical annotations. logistic_mixed_fit(coef, Ad, dlevel, Ac, pip, 0, 0); logistic_mixed_pred(coef, Ad, dlevel, Ac, prior_vec); } - //compute marginal likelihood + // Compute marginal likelihood. logm=0; t1=0; @@ -757,7 +768,8 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector for (size_t m=0; m<BF[b].size(); m++) { s+=log(1-gsl_vector_get(prior_vec, t1)); for (size_t ij=0; ij<n_grid; ij++) { - d+=gsl_vector_get(prior_vec, t1)/(1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; + d+=gsl_vector_get(prior_vec, t1)/ + (1-gsl_vector_get(prior_vec, t1))*vec_wab[ij]*BF[b][m][ij]; } } logm+=log(d)+s; @@ -773,14 +785,17 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector cout<<"iteration = "<<it<<"; marginal likelihood = "<<logm<<endl; } - //update h and rho that correspond to w_ab + // Update h and rho that correspond to w_ab. for (size_t ij=0; ij<n_grid; ij++) { sigma_a2=vec_sa2[ij]; sigma_b2=vec_sb2[ij]; - d=exp(gsl_vector_get(coef, coef->size-1))/(1+exp(gsl_vector_get(coef, coef->size-1))); - h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/(1+d*(double)ns_test*sigma_a2+1*sigma_b2); - rho=d*(double)ns_test*sigma_a2/(d*(double)ns_test*sigma_a2+1*sigma_b2); + d=exp(gsl_vector_get(coef, coef->size-1))/ + (1+exp(gsl_vector_get(coef, coef->size-1))); + h=(d*(double)ns_test*sigma_a2+1*sigma_b2)/ + (1+d*(double)ns_test*sigma_a2+1*sigma_b2); + rho=d*(double)ns_test*sigma_a2/ + (d*(double)ns_test*sigma_a2+1*sigma_b2); gsl_matrix_set (Hyper, ij, 0, h); gsl_matrix_set (Hyper, ij, 1, rho); @@ -789,13 +804,12 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector gsl_matrix_set (Hyper, ij, 4, vec_wab_new[ij]); } - //obtain beta and alpha parameters - + // Obtain beta and alpha parameters. - //save results + // Save results. WriteResult (vec_rs, Hyper, pip, coef); - //free matrices and vectors + // Free matrices and vectors. gsl_vector_free(prior_vec); gsl_matrix_free(Hyper); gsl_vector_free(pip); diff --git a/src/bslmmdap.h b/src/bslmmdap.h index 7d95db7..8445669 100644 --- a/src/bslmmdap.h +++ b/src/bslmmdap.h @@ -50,39 +50,65 @@ public: HYPBSLMM cHyp_initial; // Summary statistics - size_t ni_total, ns_total; //number of total individuals and snps - size_t ni_test, ns_test; //number of individuals and snps used for analysis + size_t ni_total, ns_total; // Number of total individuals and SNPs. + size_t ni_test, ns_test; // Number of individuals and SNPs + // used for analysis. double h_min, h_max, rho_min, rho_max; size_t h_ngrid, rho_ngrid; double time_UtZ; - double time_Omega; //time spent on optimization iterations - double time_Proposal; //time spent on constructing the proposal distribution for gamma (i.e. lmm or lm analysis) - vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis - vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis + double time_Omega; // Time spent on optimization iterations. + double time_Proposal; // Time spent on constructing the + // proposal distribution for gamma + // (i.e., lmm or lm analysis). - vector<SNPINFO> snpInfo; //record SNP information + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; - // Main Functions + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + vector<SNPINFO> snpInfo; // Record SNP information. + + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); void WriteResult (const gsl_matrix *Hyper, const gsl_matrix *BF); - void WriteResult (const vector<string> &vec_rs, const gsl_matrix *Hyper, const gsl_vector *pip, const gsl_vector *coef); - double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_b2, const double tau); - double CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, const double sigma_a2, const double sigma_b2, const double tau); + void WriteResult (const vector<string> &vec_rs, + const gsl_matrix *Hyper, const gsl_vector *pip, + const gsl_vector *coef); + double CalcMarginal (const gsl_vector *Uty, const gsl_vector *K_eval, + const double sigma_b2, const double tau); + double CalcMarginal (const gsl_matrix *UtXgamma, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double sigma_a2, const double sigma_b2, + const double tau); double CalcPrior (class HYPBSLMM &cHyp); - void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y); - void DAP_EstimateHyper (const size_t kc, const size_t kd, const vector<string> &vec_rs, const vector<double> &vec_sa2, const vector<double> &vec_sb2, const vector<double> &wab, const vector<vector<vector<double> > > &BF, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel); - + void DAP_CalcBF (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const gsl_vector *y); + void DAP_EstimateHyper (const size_t kc, const size_t kd, + const vector<string> &vec_rs, + const vector<double> &vec_sa2, + const vector<double> &vec_sb2, + const vector<double> &wab, + const vector<vector<vector<double> > > &BF, + gsl_matrix *Ac, gsl_matrix_int *Ad, + gsl_vector_int *dlevel); }; -void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, vector<double> &vec_sb2, vector<double> &vec_wab); -void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, vector<vector<vector<double> > > &BF); -void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, size_t &kc, size_t &kd); - +void ReadFile_hyb (const string &file_hyp, vector<double> &vec_sa2, + vector<double> &vec_sb2, vector<double> &vec_wab); +void ReadFile_bf (const string &file_bf, vector<string> &vec_rs, + vector<vector<vector<double> > > &BF); +void ReadFile_cat (const string &file_cat, const vector<string> &vec_rs, + gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel, + size_t &kc, size_t &kd); #endif diff --git a/src/gemma.h b/src/gemma.h index acb1309..8393470 100644 --- a/src/gemma.h +++ b/src/gemma.h @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,32 +13,28 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __GEMMA_H__ #define __GEMMA_H__ -#ifdef FORCE_FLOAT -#include "param_float.h" -#else #include "param.h" -#endif using namespace std; class GEMMA { public: - //parameters + // Parameters. string version; string date; string year; - //constructor + // Constructor. GEMMA(void); - //functions + // Functions. void PrintHeader (void); void PrintHelp (size_t option); void PrintLicense (void); @@ -47,6 +43,5 @@ public: void WriteLog (int argc, char **argv, PARAM &cPar); }; - #endif @@ -1,6 +1,6 @@ -/* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou +/* + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __LMM_H__ @@ -21,22 +21,12 @@ #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" - - -#ifdef FORCE_FLOAT -#include "param_float.h" -#include "io_float.h" -#else #include "param.h" #include "io.h" -#endif using namespace std; - - -class FUNC_PARAM -{ +class FUNC_PARAM { public: bool calc_null; @@ -48,15 +38,12 @@ public: size_t e_mode; }; - - - class LMM { public: - // IO related parameters - int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests - size_t d_pace; //display pace + // IO-related parameters + int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. + size_t d_pace; // Display pace. string file_bfile; string file_geno; @@ -75,42 +62,76 @@ public: double logl_mle_H0; // Summary statistics - size_t ni_total, ni_test; //number of individuals - size_t ns_total, ns_test; //number of snps - size_t ng_total, ng_test; //number of genes + size_t ni_total, ni_test; // Number of individuals. + size_t ns_total, ns_test; // Number of SNPs. + size_t ng_total, ng_test; // Number of genes. size_t n_cvt; - double time_UtX; //time spent on optimization iterations - double time_opt; //time spent on optimization iterations + double time_UtX; // Time spent on optimization iterations. + double time_opt; // Time spent on optimization iterations. - vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis - vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; - vector<SNPINFO> snpInfo; //record SNP information + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; - // Not included in PARAM - vector<SUMSTAT> sumStat; //Output SNPSummary Data + vector<SNPINFO> snpInfo; // Record SNP information. - // Main functions + // Not included in PARAM. + vector<SUMSTAT> sumStat; // Output SNPSummary Data. + + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); - void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Utx, const gsl_matrix *W, const gsl_vector *x); - void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y); - // WJA added - void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y); - void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y); - void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env); - void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const gsl_matrix *W, const gsl_vector *y, const gsl_vector *env); + void AnalyzeGene (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Utx, + const gsl_matrix *W, const gsl_vector *x); + void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + // WJA added. + void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y); + void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env); + void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const gsl_matrix *W, const gsl_vector *y, + const gsl_vector *env); void WriteFiles (); - void CalcRLWald (const double &lambda, const FUNC_PARAM ¶ms, double &beta, double &se, double &p_wald); - void CalcRLScore (const double &l, const FUNC_PARAM ¶ms, double &beta, double &se, double &p_score); + void CalcRLWald (const double &lambda, const FUNC_PARAM ¶ms, + double &beta, double &se, double &p_wald); + void CalcRLScore (const double &l, const FUNC_PARAM ¶ms, + double &beta, double &se, double &p_score); }; -void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const double l_min, const double l_max, const size_t n_region, vector<pair<size_t, double> > &pos_loglr); -void CalcLambda (const char func_name, FUNC_PARAM ¶ms, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logf); -void CalcLambda (const char func_name, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double l_min, const double l_max, const size_t n_region, double &lambda, double &logl_H0); -void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, const double trace_G, double &pve, double &pve_se); -void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, const double lambda, double &vg, double &ve, gsl_vector *beta, gsl_vector *se_beta); +void MatrixCalcLR (const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + const double l_min, const double l_max, + const size_t n_region, + vector<pair<size_t, double> > &pos_loglr); +void CalcLambda (const char func_name, FUNC_PARAM ¶ms, + const double l_min, const double l_max, + const size_t n_region, double &lambda, double &logf); +void CalcLambda (const char func_name, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_vector *Uty, + const double l_min, const double l_max, + const size_t n_region, double &lambda, double &logl_H0); +void CalcPve (const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, + const double trace_G, double &pve, double &pve_se); +void CalcLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_vector *Uty, const double lambda, double &vg, + double &ve, gsl_vector *beta, gsl_vector *se_beta); #endif diff --git a/src/logistic.cpp b/src/logistic.cpp index 002ce98..3f9d6ff 100644 --- a/src/logistic.cpp +++ b/src/logistic.cpp @@ -13,7 +13,7 @@ typedef struct{ gsl_matrix_int *X;
gsl_vector_int *nlev;
gsl_vector *y;
- gsl_matrix *Xc; // continuous covariates matrix Nobs x Kc (NULL if not used)
+ gsl_matrix *Xc; // Continuous covariates matrix Nobs x Kc (NULL if not used).
double lambdaL1;
double lambdaL2;
} fix_parm_mixed_T;
@@ -29,8 +29,9 @@ double fLogit_mixed(gsl_vector *beta, int npar = beta->size;
double total = 0;
double aux = 0;
+
// Changed loop start at 1 instead of 0 to avoid regularization of
- // beta_0*\/ */
+ // beta_0*\/
// #pragma omp parallel for reduction (+:total)
for(int i = 1; i < npar; ++i)
total += beta->data[i]*beta->data[i];
@@ -57,8 +58,9 @@ double fLogit_mixed(gsl_vector *beta, }
-void logistic_mixed_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1)
- ,gsl_matrix_int *X //Matrix Nobs x K
+void logistic_mixed_pred(gsl_vector *beta // Vector of parameters
+ // length = 1 + Sum_k(C_k -1)
+ ,gsl_matrix_int *X //Matrix Nobs x K
,gsl_vector_int *nlev // Vector with number categories
,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc (NULL if not used)
,gsl_vector *yhat //Vector of prob. predicted by the logistic
@@ -287,39 +289,40 @@ int logistic_mixed_fit(gsl_vector *beta /* Categorical */
/***************/
-// I need to bundle all the data that goes to the function to optimze together.
+// I need to bundle all the data that goes to the function to optimze
+// together.
typedef struct{
gsl_matrix_int *X;
gsl_vector_int *nlev;
gsl_vector *y;
double lambdaL1;
double lambdaL2;
-}fix_parm_cat_T;
-
-
-double fLogit_cat(gsl_vector *beta
- ,gsl_matrix_int *X
- ,gsl_vector_int *nlev
- ,gsl_vector *y
- ,double lambdaL1
- ,double lambdaL2)
-{
+} fix_parm_cat_T;
+
+double fLogit_cat(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2) {
int n = y->size;
- // int k = X->size2;
int npar = beta->size;
double total = 0;
double aux = 0;
- /* omp_set_num_threads(ompthr); */
- /* /\* Changed loop start at 1 instead of 0 to avoid regularization of beta 0*\/ */
- /* /\*#pragma omp parallel for reduction (+:total)*\/ */
+ // omp_set_num_threads(ompthr); /\* Changed loop start at 1 instead
+ // of 0 to avoid regularization of beta 0*\/ /\*#pragma omp parallel
+ // for reduction (+:total)*\/
for(int i = 1; i < npar; ++i)
total += beta->data[i]*beta->data[i];
total = (-total*lambdaL2/2);
- /* /\*#pragma omp parallel for reduction (+:aux)*\/ */
+
+ // /\*#pragma omp parallel for reduction (+:aux)*\/
for(int i = 1; i < npar; ++i)
aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
total = total-aux*lambdaL1;
- /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+
+ // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
+ // #reduction (+:total)
for(int i = 0; i < n; ++i) {
double Xbetai=beta->data[0];
int iParm=1;
@@ -333,7 +336,6 @@ double fLogit_cat(gsl_vector *beta return -total;
}
-
void logistic_cat_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1)
,gsl_matrix_int *X //Matrix Nobs x K
,gsl_vector_int *nlev // Vector with number categories
diff --git a/src/mvlmm.h b/src/mvlmm.h index 9ff567c..d495c26 100644 --- a/src/mvlmm.h +++ b/src/mvlmm.h @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,36 +13,25 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ #ifndef __MVLMM_H__ #define __MVLMM_H__ #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" - - -#ifdef FORCE_FLOAT -#include "param_float.h" -#include "io_float.h" -#else #include "param.h" #include "io.h" -#endif using namespace std; - - - - class MVLMM { public: - // IO related parameters - int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests - size_t d_pace; //display pace + // IO-related parameters. + int a_mode; // Analysis mode: 1/2/3/4 for Frequentist tests. + size_t d_pace; // Display pace. string file_bfile; string file_geno; @@ -50,48 +39,68 @@ public: string file_out; string path_out; - // MVLMM related parameters + // MVLMM-related parameters. double l_min; double l_max; size_t n_region; double logl_remle_H0, logl_mle_H0; vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; - vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null; - vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null; + vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; + vector<double> VVe_mle_null; + vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; + vector<double> se_beta_mle_null; double p_nr; size_t em_iter, nr_iter; double em_prec, nr_prec; size_t crt; - // Summary statistics - size_t ni_total, ni_test; //number of individuals - size_t ns_total, ns_test; //number of snps + // Summary statistics. + size_t ni_total, ni_test; // Number of individuals. + size_t ns_total, ns_test; // Number of SNPs. size_t n_cvt; size_t n_ph; - double time_UtX; //time spent on optimization iterations - double time_opt; //time spent on optimization iterations + double time_UtX; // Time spent on optimization iterations. + double time_opt; // Time spent on optimization iterations. + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; - vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis - vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; - vector<SNPINFO> snpInfo; //record SNP information + vector<SNPINFO> snpInfo; // Record SNP information. - // Not included in PARAM - vector<MPHSUMSTAT> sumStat; //Output SNPSummary Data + // Not included in PARAM. + vector<MPHSUMSTAT> sumStat; // Output SNPSummary Data. // Main functions void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); - void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY); - void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY); - void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY); - void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env); - void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const gsl_vector *env); + void AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY); + void AnalyzeBimbamGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env); + void AnalyzePlinkGXE (const gsl_matrix *U, const gsl_vector *eval, + const gsl_matrix *UtW, const gsl_matrix *UtY, + const gsl_vector *env); void WriteFiles (); }; -void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, const gsl_matrix *UtY, const size_t em_iter, const size_t nr_iter, const double em_prec, const double nr_prec, const double l_min, const double l_max, const size_t n_region, gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, gsl_matrix *se_B); +void CalcMvLmmVgVeBeta (const gsl_vector *eval, const gsl_matrix *UtW, + const gsl_matrix *UtY, const size_t em_iter, + const size_t nr_iter, const double em_prec, + const double nr_prec, const double l_min, + const double l_max, const size_t n_region, + gsl_matrix *V_g, gsl_matrix *V_e, gsl_matrix *B, + gsl_matrix *se_B); #endif |