From 3935ba39d30666dd7d4a831155631847c77b70c4 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Wed, 2 Aug 2017 08:46:58 +0000 Subject: Massive patch using LLVM coding style. It was generated with: clang-format -style=LLVM -i *.cpp *.h Please set your editor to replace tabs with spaces and use indentation of 2 spaces. --- src/bslmm.cpp | 3331 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 1689 insertions(+), 1642 deletions(-) (limited to 'src/bslmm.cpp') diff --git a/src/bslmm.cpp b/src/bslmm.cpp index d579802..3305639 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -16,1360 +16,1428 @@ along with this program. If not, see . */ -#include #include +#include #include -#include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include -#include "gsl/gsl_vector.h" -#include "gsl/gsl_matrix.h" -#include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" #include "gsl/gsl_randist.h" -#include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" +#include "gsl/gsl_vector.h" -#include "lapack.h" -#include "param.h" #include "bslmm.h" -#include "lmm.h" +#include "lapack.h" #include "lm.h" +#include "lmm.h" #include "mathfunc.h" +#include "param.h" using namespace std; -void BSLMM::CopyFromParam (PARAM &cPar) { - a_mode=cPar.a_mode; - d_pace=cPar.d_pace; - - file_bfile=cPar.file_bfile; - file_geno=cPar.file_geno; - file_out=cPar.file_out; - path_out=cPar.path_out; - - l_min=cPar.h_min; - l_max=cPar.h_max; - n_region=cPar.n_region; - pve_null=cPar.pve_null; - pheno_mean=cPar.pheno_mean; - - time_UtZ=0.0; - time_Omega=0.0; - n_accept=0; - - h_min=cPar.h_min; - h_max=cPar.h_max; - h_scale=cPar.h_scale; - rho_min=cPar.rho_min; - rho_max=cPar.rho_max; - rho_scale=cPar.rho_scale; - logp_min=cPar.logp_min; - logp_max=cPar.logp_max; - logp_scale=cPar.logp_scale; - - s_min=cPar.s_min; - s_max=cPar.s_max; - w_step=cPar.w_step; - s_step=cPar.s_step; - r_pace=cPar.r_pace; - w_pace=cPar.w_pace; - n_mh=cPar.n_mh; - geo_mean=cPar.geo_mean; - randseed=cPar.randseed; - trace_G=cPar.trace_G; - - ni_total=cPar.ni_total; - ns_total=cPar.ns_total; - ni_test=cPar.ni_test; - ns_test=cPar.ns_test; - n_cvt=cPar.n_cvt; - - indicator_idv=cPar.indicator_idv; - indicator_snp=cPar.indicator_snp; - snpInfo=cPar.snpInfo; - - return; +void BSLMM::CopyFromParam(PARAM &cPar) { + a_mode = cPar.a_mode; + d_pace = cPar.d_pace; + + file_bfile = cPar.file_bfile; + file_geno = cPar.file_geno; + file_out = cPar.file_out; + path_out = cPar.path_out; + + l_min = cPar.h_min; + l_max = cPar.h_max; + n_region = cPar.n_region; + pve_null = cPar.pve_null; + pheno_mean = cPar.pheno_mean; + + time_UtZ = 0.0; + time_Omega = 0.0; + n_accept = 0; + + h_min = cPar.h_min; + h_max = cPar.h_max; + h_scale = cPar.h_scale; + rho_min = cPar.rho_min; + rho_max = cPar.rho_max; + rho_scale = cPar.rho_scale; + logp_min = cPar.logp_min; + logp_max = cPar.logp_max; + logp_scale = cPar.logp_scale; + + s_min = cPar.s_min; + s_max = cPar.s_max; + w_step = cPar.w_step; + s_step = cPar.s_step; + r_pace = cPar.r_pace; + w_pace = cPar.w_pace; + n_mh = cPar.n_mh; + geo_mean = cPar.geo_mean; + randseed = cPar.randseed; + trace_G = cPar.trace_G; + + ni_total = cPar.ni_total; + ns_total = cPar.ns_total; + ni_test = cPar.ni_test; + ns_test = cPar.ns_test; + n_cvt = cPar.n_cvt; + + indicator_idv = cPar.indicator_idv; + indicator_snp = cPar.indicator_snp; + snpInfo = cPar.snpInfo; + + return; } -void BSLMM::CopyToParam (PARAM &cPar) { - cPar.time_UtZ=time_UtZ; - cPar.time_Omega=time_Omega; - cPar.time_Proposal=time_Proposal; - cPar.cHyp_initial=cHyp_initial; - cPar.n_accept=n_accept; - cPar.pheno_mean=pheno_mean; - cPar.randseed=randseed; +void BSLMM::CopyToParam(PARAM &cPar) { + cPar.time_UtZ = time_UtZ; + cPar.time_Omega = time_Omega; + cPar.time_Proposal = time_Proposal; + cPar.cHyp_initial = cHyp_initial; + cPar.n_accept = n_accept; + cPar.pheno_mean = pheno_mean; + cPar.randseed = randseed; - return; + return; } -void BSLMM::WriteBV (const gsl_vector *bv) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".bv.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "< > &beta_g, - const gsl_vector *alpha, const size_t w) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".param.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "<> &beta_g, + const gsl_vector *alpha, const size_t w) { + string file_str; + file_str = path_out + "/" + file_out; + file_str += ".param.txt"; + + ofstream outfile(file_str.c_str(), ofstream::out); + if (!outfile) { + cout << "error writing file: " << file_str.c_str() << endl; + return; + } + + outfile << "chr" + << "\t" + << "rs" + << "\t" + << "ps" + << "\t" + << "n_miss" + << "\t" + << "alpha" + << "\t" + << "beta" + << "\t" + << "gamma" << endl; + + size_t t = 0; + for (size_t i = 0; i < ns_total; ++i) { + if (indicator_snp[i] == 0) { + continue; + } + + outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" + << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"; + + outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t) + << "\t"; + if (beta_g[t].second != 0) { + outfile << beta_g[t].first / beta_g[t].second << "\t" + << beta_g[t].second / (double)w << endl; + } else { + outfile << 0.0 << "\t" << 0.0 << endl; + } + t++; + } + + outfile.clear(); + outfile.close(); + return; } -void BSLMM::WriteParam (const gsl_vector *alpha) { - string file_str; - file_str=path_out+"/"+file_out; - file_str+=".param.txt"; - - ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) { - cout<<"error writing file: "< &rank) { - size_t pos; - for (size_t i=0; i &rank) { + size_t pos; + for (size_t i = 0; i < rank.size(); ++i) { + pos = mapRank2pos[rank[i]]; + gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i); + gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos); + gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector); + } + + return; } -double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, - const double sigma_a2) { - double pve, var_y; +double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2) { + double pve, var_y; - gsl_matrix *Omega=gsl_matrix_alloc (UtXgamma->size2, UtXgamma->size2); - gsl_vector *Xty=gsl_vector_alloc (UtXgamma->size2); - gsl_vector *OiXty=gsl_vector_alloc (UtXgamma->size2); + gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); + gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2); - gsl_matrix_set_identity (Omega); - gsl_matrix_scale (Omega, 1.0/sigma_a2); + gsl_matrix_set_identity(Omega); + gsl_matrix_scale(Omega, 1.0 / sigma_a2); - lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, - 1.0, Omega); - gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); + lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega); + gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); - CholeskySolve(Omega, Xty, OiXty); + CholeskySolve(Omega, Xty, OiXty); - gsl_blas_ddot (Xty, OiXty, &pve); - gsl_blas_ddot (Uty, Uty, &var_y); + gsl_blas_ddot(Xty, OiXty, &pve); + gsl_blas_ddot(Uty, Uty, &var_y); - pve/=var_y; + pve /= var_y; - gsl_matrix_free (Omega); - gsl_vector_free (Xty); - gsl_vector_free (OiXty); + gsl_matrix_free(Omega); + gsl_vector_free(Xty); + gsl_vector_free(OiXty); - return pve; + return pve; } -void BSLMM::InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, - vector &rank, class HYPBSLMM &cHyp, - vector > &pos_loglr) { - double q_genome=gsl_cdf_chisq_Qinv(0.05/(double)ns_test, 1); - - cHyp.n_gamma=0; - for (size_t i=0; iq_genome) {cHyp.n_gamma++;} - } - if (cHyp.n_gamma<10) {cHyp.n_gamma=10;} - - if (cHyp.n_gamma>s_max) {cHyp.n_gamma=s_max;} - if (cHyp.n_gamma1.0) {cHyp.rho=1.0;} - - if (cHyp.hh_max) {cHyp.h=h_max;} - if (cHyp.rhorho_max) {cHyp.rho=rho_max;} - if (cHyp.logplogp_max) {cHyp.logp=logp_max;} - - cout<<"initial value of h = "<size; i++) - { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); - } - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); - - // This computes inv(L^T(Omega)) %*% beta. - gsl_vector_scale(beta, sqrt(sigma_a2/tau)); - gsl_vector_add(beta, beta_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); - - // Sample alpha. - gsl_vector_memcpy (alpha_prime, Uty); - gsl_vector_sub (alpha_prime, UtXb); - gsl_vector_mul (alpha_prime, weight_Hi); - gsl_vector_scale (alpha_prime, sigma_b2); - - // Sample u. - gsl_vector_memcpy (Utu, alpha_prime); - gsl_vector_mul (Utu, K_eval); - - if (a_mode==11) {gsl_vector_scale (Utu_rand, sqrt(1.0/tau));} - gsl_vector_add (Utu, Utu_rand); - - // For quantitative traits, calculate pve and pge. - if (a_mode==11) { - gsl_blas_ddot (UtXb, UtXb, &d); - cHyp.pge=d/(double)ni_test; - - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=cHyp.pge+d/(double)ni_test; - - if (cHyp.pve==0) {cHyp.pge=0.0;} - else {cHyp.pge/=cHyp.pve;} - cHyp.pve/=cHyp.pve+1.0/tau; - } - - gsl_matrix_free (UtXgamma_eval); - gsl_matrix_free (Omega); - gsl_vector_free (XtHiy); - gsl_vector_free (beta_hat); - gsl_vector_free (Utu_rand); - gsl_vector_free (weight_Hi); - - logpost=-0.5*logdet_H-0.5*logdet_O; - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} - else {logpost-=0.5*P_yy;} - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+ - ((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); - - return logpost; +double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, gsl_vector *UtXb, + gsl_vector *Utu, gsl_vector *alpha_prime, + gsl_vector *beta, class HYPBSLMM &cHyp) { + clock_t time_start; + + double sigma_a2 = cHyp.h * cHyp.rho / + (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); + double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h)); + + double logpost = 0.0; + double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0; + + gsl_matrix *UtXgamma_eval = + gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2); + gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); + gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2); + gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1); + gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1); + + gsl_matrix_memcpy(UtXgamma_eval, UtXgamma); + + logdet_H = 0.0; + P_yy = 0.0; + for (size_t i = 0; i < ni_test; ++i) { + gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i); + d = gsl_vector_get(K_eval, i) * sigma_b2; + ds = d / (d + 1.0); + d = 1.0 / (d + 1.0); + gsl_vector_set(weight_Hi, i, d); + + logdet_H -= log(d); + uy = gsl_vector_get(Uty, i); + P_yy += d * uy * uy; + gsl_vector_scale(&UtXgamma_row.vector, d); + + gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds)); + } + + // Calculate Omega. + gsl_matrix_set_identity(Omega); + + time_start = clock(); + lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, + Omega); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // Calculate beta_hat. + gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); + + logdet_O = CholeskySolve(Omega, XtHiy, beta_hat); + + gsl_vector_scale(beta_hat, sigma_a2); + + gsl_blas_ddot(XtHiy, beta_hat, &d); + P_yy -= d; + + // Sample tau. + double tau = 1.0; + if (a_mode == 11) { + tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); + } + + // Sample beta. + for (size_t i = 0; i < beta->size; i++) { + d = gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); + } + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); + + // This computes inv(L^T(Omega)) %*% beta. + gsl_vector_scale(beta, sqrt(sigma_a2 / tau)); + gsl_vector_add(beta, beta_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); + + // Sample alpha. + gsl_vector_memcpy(alpha_prime, Uty); + gsl_vector_sub(alpha_prime, UtXb); + gsl_vector_mul(alpha_prime, weight_Hi); + gsl_vector_scale(alpha_prime, sigma_b2); + + // Sample u. + gsl_vector_memcpy(Utu, alpha_prime); + gsl_vector_mul(Utu, K_eval); + + if (a_mode == 11) { + gsl_vector_scale(Utu_rand, sqrt(1.0 / tau)); + } + gsl_vector_add(Utu, Utu_rand); + + // For quantitative traits, calculate pve and pge. + if (a_mode == 11) { + gsl_blas_ddot(UtXb, UtXb, &d); + cHyp.pge = d / (double)ni_test; + + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = cHyp.pge + d / (double)ni_test; + + if (cHyp.pve == 0) { + cHyp.pge = 0.0; + } else { + cHyp.pge /= cHyp.pve; + } + cHyp.pve /= cHyp.pve + 1.0 / tau; + } + + gsl_matrix_free(UtXgamma_eval); + gsl_matrix_free(Omega); + gsl_vector_free(XtHiy); + gsl_vector_free(beta_hat); + gsl_vector_free(Utu_rand); + gsl_vector_free(weight_Hi); + + logpost = -0.5 * logdet_H - 0.5 * logdet_O; + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(P_yy); + } else { + logpost -= 0.5 * P_yy; + } + logpost += + ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); + + return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, - gsl_vector *z_hat, class HYPBSLMM &cHyp) { - double d; +void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp) { + double d; - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=d/(double)ni_test; + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = d / (double)ni_test; - gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); - cHyp.pve/=cHyp.pve+1.0; - cHyp.pge=0.0; + cHyp.pve /= cHyp.pve + 1.0; + cHyp.pge = 0.0; - return; + return; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, - const gsl_vector *Utu, gsl_vector *z_hat, - class HYPBSLMM &cHyp) { - double d; - gsl_vector *UtXbU=gsl_vector_alloc (Utu->size); +void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { + double d; + gsl_vector *UtXbU = gsl_vector_alloc(Utu->size); - gsl_blas_ddot (UtXb, UtXb, &d); - cHyp.pge=d/(double)ni_test; + gsl_blas_ddot(UtXb, UtXb, &d); + cHyp.pge = d / (double)ni_test; - gsl_blas_ddot (Utu, Utu, &d); - cHyp.pve=cHyp.pge+d/(double)ni_test; + gsl_blas_ddot(Utu, Utu, &d); + cHyp.pve = cHyp.pge + d / (double)ni_test; - gsl_vector_memcpy (UtXbU, Utu); - gsl_vector_add (UtXbU, UtXb); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); + gsl_vector_memcpy(UtXbU, Utu); + gsl_vector_add(UtXbU, UtXb); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); - if (cHyp.pve==0) {cHyp.pge=0.0;} - else {cHyp.pge/=cHyp.pve;} + if (cHyp.pve == 0) { + cHyp.pge = 0.0; + } else { + cHyp.pge /= cHyp.pve; + } - cHyp.pve/=cHyp.pve+1.0; + cHyp.pve /= cHyp.pve + 1.0; - gsl_vector_free(UtXbU); - return; + gsl_vector_free(UtXbU); + return; } -void BSLMM::SampleZ (const gsl_vector *y, const gsl_vector *z_hat, - gsl_vector *z) { - double d1, d2, z_rand=0.0; - for (size_t i=0; isize; ++i) { - d1=gsl_vector_get (y, i); - d2=gsl_vector_get (z_hat, i); - - // y is centered for case control studies. - if (d1<=0.0) { - - // Control, right truncated. - do { - z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); - } while (z_rand>0.0); - } - else { - do { - z_rand=d2+gsl_ran_gaussian(gsl_r, 1.0); - } while (z_rand<0.0); - } - - gsl_vector_set (z, i, z_rand); - } - - return; +void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat, + gsl_vector *z) { + double d1, d2, z_rand = 0.0; + for (size_t i = 0; i < z->size; ++i) { + d1 = gsl_vector_get(y, i); + d2 = gsl_vector_get(z_hat, i); + + // y is centered for case control studies. + if (d1 <= 0.0) { + + // Control, right truncated. + do { + z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); + } while (z_rand > 0.0); + } else { + do { + z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); + } while (z_rand < 0.0); + } + + gsl_vector_set(z, i, z_rand); + } + + return; } -double BSLMM::ProposeHnRho (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat) { +double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { - double h=cHyp_old.h, rho=cHyp_old.rho; + double h = cHyp_old.h, rho = cHyp_old.rho; - double d_h=(h_max-h_min)*h_scale, d_rho=(rho_max-rho_min)*rho_scale; + double d_h = (h_max - h_min) * h_scale, + d_rho = (rho_max - rho_min) * rho_scale; - for (size_t i=0; ih_max) {h=2*h_max-h;} + for (size_t i = 0; i < repeat; ++i) { + h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h; + if (h < h_min) { + h = 2 * h_min - h; + } + if (h > h_max) { + h = 2 * h_max - h; + } - rho=rho+(gsl_rng_uniform(gsl_r)-0.5)*d_rho; - if (rhorho_max) {rho=2*rho_max-rho;} - } - cHyp_new.h=h; - cHyp_new.rho=rho; - return 0.0; + rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho; + if (rho < rho_min) { + rho = 2 * rho_min - rho; + } + if (rho > rho_max) { + rho = 2 * rho_max - rho; + } + } + cHyp_new.h = h; + cHyp_new.rho = rho; + return 0.0; } -double BSLMM::ProposePi (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat) { - double logp_old=cHyp_old.logp, logp_new=cHyp_old.logp; - double log_ratio=0.0; +double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { + double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp; + double log_ratio = 0.0; - double d_logp=min(0.1, (logp_max-logp_min)*logp_scale); + double d_logp = min(0.1, (logp_max - logp_min) * logp_scale); - for (size_t i=0; ilogp_max) {logp_new=2*logp_max-logp_new;} - log_ratio+=logp_new-logp_old; - logp_old=logp_new; - } - cHyp_new.logp=logp_new; + for (size_t i = 0; i < repeat; ++i) { + logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp; + if (logp_new < logp_min) { + logp_new = 2 * logp_min - logp_new; + } + if (logp_new > logp_max) { + logp_new = 2 * logp_max - logp_new; + } + log_ratio += logp_new - logp_old; + logp_old = logp_new; + } + cHyp_new.logp = logp_new; - return log_ratio; + return log_ratio; } -bool comp_vec (size_t a, size_t b) { - return (a < b); -} +bool comp_vec(size_t a, size_t b) { return (a < b); } + +double BSLMM::ProposeGamma(const vector &rank_old, + vector &rank_new, const double *p_gamma, + const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat) { + map mapRank2in; + size_t r; + double unif, logp = 0.0; + int flag_gamma; + size_t r_add, r_remove, col_id; + + rank_new.clear(); + if (cHyp_old.n_gamma != rank_old.size()) { + cout << "size wrong" << endl; + } + + if (cHyp_old.n_gamma != 0) { + for (size_t i = 0; i < rank_old.size(); ++i) { + r = rank_old[i]; + rank_new.push_back(r); + mapRank2in[r] = 1; + } + } + cHyp_new.n_gamma = cHyp_old.n_gamma; + + for (size_t i = 0; i < repeat; ++i) { + unif = gsl_rng_uniform(gsl_r); + + if (unif < 0.40 && cHyp_new.n_gamma < s_max) { + flag_gamma = 1; + } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) { + flag_gamma = 2; + } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 && + cHyp_new.n_gamma < ns_test) { + flag_gamma = 3; + } else { + flag_gamma = 4; + } + + if (flag_gamma == 1) { + + // Add a SNP. + do { + r_add = gsl_ran_discrete(gsl_r, gsl_t); + } while (mapRank2in.count(r_add) != 0); + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + + mapRank2in[r_add] = 1; + rank_new.push_back(r_add); + cHyp_new.n_gamma++; + logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma); + } else if (flag_gamma == 2) { + + // Delete a SNP. + col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + r_remove = rank_new[col_id]; + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + prob_total += p_gamma[r_remove]; + + mapRank2in.erase(r_remove); + rank_new.erase(rank_new.begin() + col_id); + logp += + log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma); + cHyp_new.n_gamma--; + } else if (flag_gamma == 3) { + + // Switch a SNP. + col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); + r_remove = rank_new[col_id]; + + // Be careful with the proposal. + do { + r_add = gsl_ran_discrete(gsl_r, gsl_t); + } while (mapRank2in.count(r_add) != 0); + + double prob_total = 1.0; + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + r = rank_new[i]; + prob_total -= p_gamma[r]; + } + + logp += log(p_gamma[r_remove] / + (prob_total + p_gamma[r_remove] - p_gamma[r_add])); + logp -= log(p_gamma[r_add] / prob_total); + + mapRank2in.erase(r_remove); + mapRank2in[r_add] = 1; + rank_new.erase(rank_new.begin() + col_id); + rank_new.push_back(r_add); + } else { + logp += 0; + } // Do not change. + } + + stable_sort(rank_new.begin(), rank_new.end(), comp_vec); -double BSLMM::ProposeGamma (const vector &rank_old, - vector &rank_new, - const double *p_gamma, - const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, - const size_t &repeat) { - map mapRank2in; - size_t r; - double unif, logp=0.0; - int flag_gamma; - size_t r_add, r_remove, col_id; - - rank_new.clear(); - if (cHyp_old.n_gamma!=rank_old.size()) {cout<<"size wrong"<=0.40 && unif < 0.80 && - cHyp_new.n_gamma>s_min) { - flag_gamma=2; - } - else if (unif>=0.80 && cHyp_new.n_gamma>0 && - cHyp_new.n_gamma a, pair b) { - return (a.second > b.second); +bool comp_lr(pair a, pair b) { + return (a.second > b.second); } // If a_mode==13 then Uty==y. -void BSLMM::MCMC (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y) { - clock_t time_start; - - class HYPBSLMM cHyp_old, cHyp_new; - - gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - - gsl_vector *alpha_prime=gsl_vector_alloc (ni_test); - gsl_vector *alpha_new=gsl_vector_alloc (ni_test); - gsl_vector *alpha_old=gsl_vector_alloc (ni_test); - gsl_vector *Utu=gsl_vector_alloc (ni_test); - gsl_vector *Utu_new=gsl_vector_alloc (ni_test); - gsl_vector *Utu_old=gsl_vector_alloc (ni_test); - - gsl_vector *UtXb_new=gsl_vector_alloc (ni_test); - gsl_vector *UtXb_old=gsl_vector_alloc (ni_test); - - gsl_vector *z_hat=gsl_vector_alloc (ni_test); - gsl_vector *z=gsl_vector_alloc (ni_test); - gsl_vector *Utz=gsl_vector_alloc (ni_test); - - gsl_vector_memcpy (Utz, Uty); - - double logPost_new, logPost_old; - double logMHratio; - double mean_z=0.0; - - gsl_matrix_set_zero (Result_gamma); - gsl_vector_set_zero (Utu); - gsl_vector_set_zero (alpha_prime); - if (a_mode==13) { - pheno_mean=0.0; - } - - vector > beta_g; - for (size_t i=0; i rank_new, rank_old; - vector beta_new, beta_old; - - vector > pos_loglr; - - time_start=clock(); - MatrixCalcLR (U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr); - time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr); - for (size_t i=0; itm_hour%24*3600+ - ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; - CalcPgamma (p_gamma); - - gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - - // Initial parameters. - InitialMCMC (UtX, Utz, rank_old, cHyp_old, pos_loglr); - - cHyp_initial=cHyp_old; - - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, - cHyp_old); - - beta_old.clear(); - for (size_t i=0; isize; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - - // Calculate centered z_hat, and pve. - if (a_mode==13) { - time_start=clock(); - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - CalcCC_PVEnZ (U, Utu_old, z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (U, UtXb_old, Utu_old, z_hat, cHyp_old); - } - time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - } - - // Start MCMC. - int accept; - size_t total_step=w_step+s_step; - size_t w=0, w_col, pos; - size_t repeat=0; - - for (size_t t=0; tsize; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - } - - // M-H steps. - for (size_t i=0; isize; ++i) { - beta_new.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - - logMHratio+=logPost_new-logPost_old; - - if (logMHratio>0 || - log(gsl_rng_uniform(gsl_r))> beta_g; + for (size_t i = 0; i < ns_test; i++) { + beta_g.push_back(make_pair(0.0, 0.0)); + } + + vector rank_new, rank_old; + vector beta_new, beta_old; + + vector> pos_loglr; + + time_start = clock(); + MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr); + time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); + for (size_t i = 0; i < ns_test; ++i) { + mapRank2pos[i] = pos_loglr[i].first; + } + + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. + gsl_rng_env_setup(); + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = + (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + double *p_gamma = new double[ns_test]; + CalcPgamma(p_gamma); + + gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); + + // Initial parameters. + InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr); + + cHyp_initial = cHyp_old; + + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + beta_old.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); + SetXgamma(UtXgamma, UtX, rank_old); + logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, + alpha_old, beta, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_old.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + + // Calculate centered z_hat, and pve. + if (a_mode == 13) { + time_start = clock(); + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); + } + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + + // Start MCMC. + int accept; + size_t total_step = w_step + s_step; + size_t w = 0, w_col, pos; + size_t repeat = 0; + + for (size_t t = 0; t < total_step; ++t) { + if (t % d_pace == 0 || t == total_step - 1) { + ProgressBar("Running MCMC ", t, total_step - 1, + (double)n_accept / (double)(t * n_mh + 1)); + } + + if (a_mode == 13) { + SampleZ(y, z_hat, z); + mean_z = CenterVector(z); + + time_start = clock(); + gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz); + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + // First proposal. + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); + beta_old.clear(); + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + beta_old.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); + SetXgamma(UtXgamma, UtX, rank_old); + logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, + alpha_old, beta, cHyp_old); + + beta_old.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_old.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + } + + // M-H steps. + for (size_t i = 0; i < n_mh; ++i) { + if (gsl_rng_uniform(gsl_r) < 0.33) { + repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); + } else { + repeat = 1; + } + + logMHratio = 0.0; + logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio += + ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); + + if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) { + logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new); + beta_new.clear(); + for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { + beta_new.push_back(0); + } + } else { + gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma); + gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma); + SetXgamma(UtXgamma, UtX, rank_new); + logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new, + alpha_new, beta, cHyp_new); + beta_new.clear(); + for (size_t i = 0; i < beta->size; ++i) { + beta_new.push_back(gsl_vector_get(beta, i)); + } + gsl_matrix_free(UtXgamma); + gsl_vector_free(beta); + } + + logMHratio += logPost_new - logPost_old; + + if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { + accept = 1; + n_accept++; + } else { + accept = 0; + } + + if (accept == 1) { + logPost_old = logPost_new; + rank_old.clear(); + beta_old.clear(); + if (rank_new.size() != 0) { + for (size_t i = 0; i < rank_new.size(); ++i) { + rank_old.push_back(rank_new[i]); + beta_old.push_back(beta_new[i]); + } + } + cHyp_old = cHyp_new; + gsl_vector_memcpy(alpha_old, alpha_new); + gsl_vector_memcpy(UtXb_old, UtXb_new); + gsl_vector_memcpy(Utu_old, Utu_new); + } else { + cHyp_new = cHyp_old; + } + } + + // Calculate z_hat, and pve. + if (a_mode == 13) { + time_start = clock(); + if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { + CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); + } + + // Sample mu and update z_hat. + gsl_vector_sub(z, z_hat); + mean_z += CenterVector(z); + mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); + gsl_vector_add_constant(z_hat, mean_z); + + time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + + // Save data. + if (t < w_step) { + continue; + } else { + if (t % r_pace == 0) { + w_col = w % w_pace; + if (w_col == 0) { + if (w == 0) { + WriteResult(0, Result_hyp, Result_gamma, w_col); + } else { + WriteResult(1, Result_hyp, Result_gamma, w_col); + gsl_matrix_set_zero(Result_hyp); + gsl_matrix_set_zero(Result_gamma); + } + } + + gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); + gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); + gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); + gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); + gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); + gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); + + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + pos = mapRank2pos[rank_old[i]] + 1; + + gsl_matrix_set(Result_gamma, w_col, i, pos); + + beta_g[pos - 1].first += beta_old[i]; + beta_g[pos - 1].second += 1.0; + } + + gsl_vector_add(alpha_prime, alpha_old); + gsl_vector_add(Utu, Utu_old); + + if (a_mode == 13) { + pheno_mean += mean_z; + } + + w++; + } + } + } + cout << endl; + + w_col = w % w_pace; + WriteResult(1, Result_hyp, Result_gamma, w_col); + + gsl_matrix_free(Result_hyp); + gsl_matrix_free(Result_gamma); + + gsl_vector_free(z_hat); + gsl_vector_free(z); + gsl_vector_free(Utz); + gsl_vector_free(UtXb_new); + gsl_vector_free(UtXb_old); + gsl_vector_free(alpha_new); + gsl_vector_free(alpha_old); + gsl_vector_free(Utu_new); + gsl_vector_free(Utu_old); + + gsl_vector_scale(alpha_prime, 1.0 / (double)w); + gsl_vector_scale(Utu, 1.0 / (double)w); + if (a_mode == 13) { + pheno_mean /= (double)w; + } + + gsl_vector *alpha = gsl_vector_alloc(ns_test); + gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0, + alpha); + WriteParam(beta_g, alpha, w); + gsl_vector_free(alpha); + + gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime); + WriteBV(alpha_prime); + + gsl_vector_free(alpha_prime); + gsl_vector_free(Utu); + + delete[] p_gamma; + beta_g.clear(); + + return; } void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *eval, - const double lambda) { - gsl_vector *beta=gsl_vector_alloc (UtX->size2); - gsl_vector *H_eval=gsl_vector_alloc (Uty->size); - gsl_vector *bv=gsl_vector_alloc (Uty->size); + const gsl_vector *Uty, const gsl_vector *eval, + const double lambda) { + gsl_vector *beta = gsl_vector_alloc(UtX->size2); + gsl_vector *H_eval = gsl_vector_alloc(Uty->size); + gsl_vector *bv = gsl_vector_alloc(Uty->size); - gsl_vector_memcpy (H_eval, eval); - gsl_vector_scale (H_eval, lambda); - gsl_vector_add_constant (H_eval, 1.0); + gsl_vector_memcpy(H_eval, eval); + gsl_vector_scale(H_eval, lambda); + gsl_vector_add_constant(H_eval, 1.0); - gsl_vector_memcpy (bv, Uty); - gsl_vector_div (bv, H_eval); + gsl_vector_memcpy(bv, Uty); + gsl_vector_div(bv, H_eval); - gsl_blas_dgemv (CblasTrans, lambda/(double)UtX->size2, - UtX, bv, 0.0, beta); - gsl_vector_add_constant (H_eval, -1.0); - gsl_vector_mul (H_eval, bv); - gsl_blas_dgemv (CblasNoTrans, 1.0, U, H_eval, 0.0, bv); + gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta); + gsl_vector_add_constant(H_eval, -1.0); + gsl_vector_mul(H_eval, bv); + gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv); - WriteParam (beta); - WriteBV(bv); + WriteParam(beta); + WriteBV(bv); - gsl_vector_free (H_eval); - gsl_vector_free (beta); - gsl_vector_free (bv); + gsl_vector_free(H_eval); + gsl_vector_free(beta); + gsl_vector_free(bv); - return; + return; } // Below fits MCMC for rho=1. -void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, - const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { - time_t time_start=clock(); - gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, - s_size); - gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); - gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size); - - lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, - &X_sub.matrix, 0.0, &XtX_sub.matrix); +void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { + time_t time_start = clock(); + gsl_matrix_const_view X_sub = + gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); + gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); + gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size); + + lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, + &XtX_sub.matrix); gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); return; } -void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, - const gsl_matrix *XtX_old, const gsl_vector *Xty_old, - const gsl_vector *y, const vector &rank_old, - const vector &rank_new, gsl_matrix *X_new, - gsl_matrix *XtX_new, gsl_vector *Xty_new) { +void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector &rank_old, + const vector &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new) { double d; // rank_old and rank_new are sorted already inside PorposeGamma // calculate vectors rank_remove and rank_add. // make sure that v_size is larger than repeat. - size_t v_size=20; + size_t v_size = 20; vector rank_remove(v_size), rank_add(v_size), - rank_union(s_max+v_size); + rank_union(s_max + v_size); vector::iterator it; - it=set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), - rank_new.end(), rank_remove.begin()); - rank_remove.resize(it-rank_remove.begin()); + it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), + rank_new.end(), rank_remove.begin()); + rank_remove.resize(it - rank_remove.begin()); - it=set_difference (rank_new.begin(), rank_new.end(), rank_old.begin(), - rank_old.end(), rank_add.begin()); - rank_add.resize(it-rank_add.begin()); + it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_add.begin()); + rank_add.resize(it - rank_add.begin()); - it=set_union (rank_new.begin(), rank_new.end(), rank_old.begin(), - rank_old.end(), rank_union.begin()); - rank_union.resize(it-rank_union.begin()); + it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(), + rank_old.end(), rank_union.begin()); + rank_union.resize(it - rank_union.begin()); // Map rank_remove and rank_add. map mapRank2in_remove, mapRank2in_add; - for (size_t i=0; isize1, rank_old.size()); - gsl_matrix_const_view XtXold_sub= - gsl_matrix_const_submatrix(XtX_old, 0, 0, rank_old.size(), - rank_old.size()); - gsl_vector_const_view Xtyold_sub= - gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); - - gsl_matrix_view Xnew_sub= - gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); - gsl_matrix_view XtXnew_sub= - gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); - gsl_vector_view Xtynew_sub= - gsl_vector_subvector(Xty_new, 0, rank_new.size()); + gsl_matrix_const_view Xold_sub = + gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); + gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix( + XtX_old, 0, 0, rank_old.size(), rank_old.size()); + gsl_vector_const_view Xtyold_sub = + gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); + + gsl_matrix_view Xnew_sub = + gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); + gsl_matrix_view XtXnew_sub = + gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtynew_sub = + gsl_vector_subvector(Xty_new, 0, rank_new.size()); // Get X_new and calculate XtX_new. - if (rank_remove.size()==0 && rank_add.size()==0) { + if (rank_remove.size() == 0 && rank_add.size() == 0) { gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector); } else { size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag; - if (rank_add.size()==0) { - i_old=0; i_new=0; - for (size_t i=0; isize1, rank_add.size() ); - gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2); - gsl_matrix *XtX_ao=gsl_matrix_alloc(X_add->size2, X_old->size2); - gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2); + gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size()); + gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2); + gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2); + gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2); // Get X_add. - SetXgamma (X_add, X, rank_add); + SetXgamma(X_add, X, rank_add); // Get t(X_add)X_add and t(X_add)X_temp. - clock_t time_start=clock(); + clock_t time_start = clock(); // Somehow the lapack_dgemm does not work here. - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, - 0.0, XtX_aa); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old, - 0.0, XtX_ao); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); + gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao); gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add); - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Save to X_new, XtX_new and Xty_new. - i_old=0; i_new=0; i_add=0; - for (size_t i=0; isize1, s_size); - gsl_matrix_const_view XtX_sub= - gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); - gsl_vector_const_view Xty_sub= - gsl_vector_const_subvector (Xty, 0, s_size); - - gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size); - gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size); - gsl_vector *beta_hat=gsl_vector_alloc (s_size); - gsl_vector *Xty_temp=gsl_vector_alloc (s_size); - - gsl_vector_memcpy (Xty_temp, &Xty_sub.vector); - - // Calculate Omega. - gsl_matrix_memcpy (Omega, &XtX_sub.matrix); - gsl_matrix_scale (Omega, sigma_a2); - gsl_matrix_set_identity (M_temp); - gsl_matrix_add (Omega, M_temp); - - // Calculate beta_hat. - logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); - gsl_vector_scale (beta_hat, sigma_a2); - - gsl_blas_ddot (Xty_temp, beta_hat, &d); - P_yy-=d; - - // Sample tau. - double tau=1.0; - if (a_mode==11) { - tau = gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); - } - - // Sample beta. - for (size_t i=0; isize1, s_size); + gsl_matrix_const_view XtX_sub = + gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size); + gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size); + + gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size); + gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size); + gsl_vector *beta_hat = gsl_vector_alloc(s_size); + gsl_vector *Xty_temp = gsl_vector_alloc(s_size); + + gsl_vector_memcpy(Xty_temp, &Xty_sub.vector); + + // Calculate Omega. + gsl_matrix_memcpy(Omega, &XtX_sub.matrix); + gsl_matrix_scale(Omega, sigma_a2); + gsl_matrix_set_identity(M_temp); + gsl_matrix_add(Omega, M_temp); + + // Calculate beta_hat. + logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat); + gsl_vector_scale(beta_hat, sigma_a2); + + gsl_blas_ddot(Xty_temp, beta_hat, &d); + P_yy -= d; + + // Sample tau. + double tau = 1.0; + if (a_mode == 11) { + tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); + } + + // Sample beta. + for (size_t i = 0; i < s_size; i++) { + d = gsl_ran_gaussian(gsl_r, 1); + gsl_vector_set(beta, i, d); + } + gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size); + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, + &beta_sub.vector); + + // This computes inv(L^T(Omega)) %*% beta. + gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau)); + gsl_vector_add(&beta_sub.vector, beta_hat); + gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, + Xb); + + // For quantitative traits, calculate pve and pge. + if (a_mode == 11) { + gsl_blas_ddot(Xb, Xb, &d); + cHyp.pve = d / (double)ni_test; + cHyp.pve /= cHyp.pve + 1.0 / tau; + cHyp.pge = 1.0; + } + + logpost = -0.5 * logdet_O; + if (a_mode == 11) { + logpost -= 0.5 * (double)ni_test * log(P_yy); + } else { + logpost -= 0.5 * P_yy; + } + + logpost += + ((double)cHyp.n_gamma - 1.0) * cHyp.logp + + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); + + gsl_matrix_free(Omega); + gsl_matrix_free(M_temp); + gsl_vector_free(beta_hat); + gsl_vector_free(Xty_temp); + + return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp) -{ +void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) { gsl_vector_set_zero(z_hat); - cHyp.pve=0.0; - cHyp.pge=1.0; + cHyp.pve = 0.0; + cHyp.pge = 1.0; return; } // Calculate pve and pge, and calculate z_hat for case-control data. -void BSLMM::CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, - class HYPBSLMM &cHyp) { - double d; +void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp) { + double d; - gsl_blas_ddot (Xb, Xb, &d); - cHyp.pve=d/(double)ni_test; - cHyp.pve/=cHyp.pve+1.0; - cHyp.pge=1.0; + gsl_blas_ddot(Xb, Xb, &d); + cHyp.pve = d / (double)ni_test; + cHyp.pve /= cHyp.pve + 1.0; + cHyp.pge = 1.0; - gsl_vector_memcpy (z_hat, Xb); + gsl_vector_memcpy(z_hat, Xb); - return; + return; } // If a_mode==13, then run probit model. -void BSLMM::MCMC (const gsl_matrix *X, const gsl_vector *y) { - clock_t time_start; - double time_set=0, time_post=0; - - class HYPBSLMM cHyp_old, cHyp_new; - - gsl_matrix *Result_hyp=gsl_matrix_alloc (w_pace, 6); - gsl_matrix *Result_gamma=gsl_matrix_alloc (w_pace, s_max); - - gsl_vector *Xb_new=gsl_vector_alloc (ni_test); - gsl_vector *Xb_old=gsl_vector_alloc (ni_test); - gsl_vector *z_hat=gsl_vector_alloc (ni_test); - gsl_vector *z=gsl_vector_alloc (ni_test); - - gsl_matrix *Xgamma_old=gsl_matrix_alloc (ni_test, s_max); - gsl_matrix *XtX_old=gsl_matrix_alloc (s_max, s_max); - gsl_vector *Xtz_old=gsl_vector_alloc (s_max); - gsl_vector *beta_old=gsl_vector_alloc (s_max); - - gsl_matrix *Xgamma_new=gsl_matrix_alloc (ni_test, s_max); - gsl_matrix *XtX_new=gsl_matrix_alloc (s_max, s_max); - gsl_vector *Xtz_new=gsl_vector_alloc (s_max); - gsl_vector *beta_new=gsl_vector_alloc (s_max); - - double ztz=0.0; - gsl_vector_memcpy (z, y); - - // For quantitative traits, y is centered already in - // gemma.cpp, but just in case. - double mean_z=CenterVector (z); - gsl_blas_ddot(z, z, &ztz); - - double logPost_new, logPost_old; - double logMHratio; - - gsl_matrix_set_zero (Result_gamma); - if (a_mode==13) { - pheno_mean=0.0; - } - - vector > beta_g; - for (size_t i=0; i rank_new, rank_old; - vector > pos_loglr; - - time_start=clock(); - MatrixCalcLmLR (X, z, pos_loglr); - time_Proposal=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - stable_sort (pos_loglr.begin(), pos_loglr.end(), comp_lr); - for (size_t i=0; itm_hour%24*3600+ - ptm->tm_min*60+ptm->tm_sec); - } - gsl_r = gsl_rng_alloc(gslType); - gsl_rng_set(gsl_r, randseed); - - double *p_gamma = new double[ns_test]; - CalcPgamma (p_gamma); - - gsl_t=gsl_ran_discrete_preproc (ns_test, p_gamma); - - // Initial parameters. - InitialMCMC (X, z, rank_old, cHyp_old, pos_loglr); - - cHyp_initial=cHyp_old; - - if (cHyp_old.n_gamma==0) { - logPost_old=CalcPosterior (ztz, cHyp_old); - } - else { - SetXgamma (Xgamma_old, X, rank_old); - CalcXtX (Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); - logPost_old=CalcPosterior (Xgamma_old, XtX_old, Xtz_old, ztz, - rank_old.size(), Xb_old, beta_old, - cHyp_old); - } - - // Calculate centered z_hat, and pve. - if (a_mode==13) { - if (cHyp_old.n_gamma==0) { - CalcCC_PVEnZ (z_hat, cHyp_old); - } - else { - CalcCC_PVEnZ (Xb_old, z_hat, cHyp_old); - } - } - - // Start MCMC. - int accept; - size_t total_step=w_step+s_step; - size_t w=0, w_col, pos; - size_t repeat=0; - - for (size_t t=0; t0 || - log(gsl_rng_uniform(gsl_r))> beta_g; + for (size_t i = 0; i < ns_test; i++) { + beta_g.push_back(make_pair(0.0, 0.0)); + } + + vector rank_new, rank_old; + vector> pos_loglr; + + time_start = clock(); + MatrixCalcLmLR(X, z, pos_loglr); + time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + + stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); + for (size_t i = 0; i < ns_test; ++i) { + mapRank2pos[i] = pos_loglr[i].first; + } + + // Calculate proposal distribution for gamma (unnormalized), + // and set up gsl_r and gsl_t. + gsl_rng_env_setup(); + const gsl_rng_type *gslType; + gslType = gsl_rng_default; + if (randseed < 0) { + time_t rawtime; + time(&rawtime); + tm *ptm = gmtime(&rawtime); + + randseed = + (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); + } + gsl_r = gsl_rng_alloc(gslType); + gsl_rng_set(gsl_r, randseed); + + double *p_gamma = new double[ns_test]; + CalcPgamma(p_gamma); + + gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); + + // Initial parameters. + InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr); + + cHyp_initial = cHyp_old; + + if (cHyp_old.n_gamma == 0) { + logPost_old = CalcPosterior(ztz, cHyp_old); + } else { + SetXgamma(Xgamma_old, X, rank_old); + CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); + logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, + rank_old.size(), Xb_old, beta_old, cHyp_old); + } + + // Calculate centered z_hat, and pve. + if (a_mode == 13) { + if (cHyp_old.n_gamma == 0) { + CalcCC_PVEnZ(z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); + } + } + + // Start MCMC. + int accept; + size_t total_step = w_step + s_step; + size_t w = 0, w_col, pos; + size_t repeat = 0; + + for (size_t t = 0; t < total_step; ++t) { + if (t % d_pace == 0 || t == total_step - 1) { + ProgressBar("Running MCMC ", t, total_step - 1, + (double)n_accept / (double)(t * n_mh + 1)); + } + + if (a_mode == 13) { + SampleZ(y, z_hat, z); + mean_z = CenterVector(z); + gsl_blas_ddot(z, z, &ztz); + + // First proposal. + if (cHyp_old.n_gamma == 0) { + logPost_old = CalcPosterior(ztz, cHyp_old); + } else { + gsl_matrix_view Xold_sub = + gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size()); + gsl_vector_view Xtz_sub = + gsl_vector_subvector(Xtz_old, 0, rank_old.size()); + gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0, + &Xtz_sub.vector); + logPost_old = + CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), + Xb_old, beta_old, cHyp_old); + } + } + + // M-H steps. + for (size_t i = 0; i < n_mh; ++i) { + if (gsl_rng_uniform(gsl_r) < 0.33) { + repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); + } else { + repeat = 1; + } + + logMHratio = 0.0; + logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); + logMHratio += + ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); + logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); + + if (cHyp_new.n_gamma == 0) { + logPost_new = CalcPosterior(ztz, cHyp_new); + } else { + + // This makes sure that rank_old.size() == + // rank_remove.size() does not happen. + if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) { + time_start = clock(); + SetXgamma(Xgamma_new, X, rank_new); + CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new); + time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } else { + time_start = clock(); + SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new, + Xgamma_new, XtX_new, Xtz_new); + time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + time_start = clock(); + logPost_new = + CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(), + Xb_new, beta_new, cHyp_new); + time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); + } + logMHratio += logPost_new - logPost_old; + + if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { + accept = 1; + n_accept++; + } else { + accept = 0; + } + + if (accept == 1) { + logPost_old = logPost_new; + cHyp_old = cHyp_new; + gsl_vector_memcpy(Xb_old, Xb_new); + + rank_old.clear(); + if (rank_new.size() != 0) { + for (size_t i = 0; i < rank_new.size(); ++i) { + rank_old.push_back(rank_new[i]); + } + + gsl_matrix_view Xold_sub = + gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); + gsl_matrix_view XtXold_sub = gsl_matrix_submatrix( + XtX_old, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtzold_sub = + gsl_vector_subvector(Xtz_old, 0, rank_new.size()); + gsl_vector_view betaold_sub = + gsl_vector_subvector(beta_old, 0, rank_new.size()); + + gsl_matrix_view Xnew_sub = + gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size()); + gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix( + XtX_new, 0, 0, rank_new.size(), rank_new.size()); + gsl_vector_view Xtznew_sub = + gsl_vector_subvector(Xtz_new, 0, rank_new.size()); + gsl_vector_view betanew_sub = + gsl_vector_subvector(beta_new, 0, rank_new.size()); + + gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix); + gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix); + gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector); + gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector); + } + } else { + cHyp_new = cHyp_old; + } + } + + // Calculate z_hat, and pve. + if (a_mode == 13) { + if (cHyp_old.n_gamma == 0) { + CalcCC_PVEnZ(z_hat, cHyp_old); + } else { + CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); + } + + // Sample mu and update z_hat. + gsl_vector_sub(z, z_hat); + mean_z += CenterVector(z); + mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); + + gsl_vector_add_constant(z_hat, mean_z); + } + + // Save data. + if (t < w_step) { + continue; + } else { + if (t % r_pace == 0) { + w_col = w % w_pace; + if (w_col == 0) { + if (w == 0) { + WriteResult(0, Result_hyp, Result_gamma, w_col); + } else { + WriteResult(1, Result_hyp, Result_gamma, w_col); + gsl_matrix_set_zero(Result_hyp); + gsl_matrix_set_zero(Result_gamma); + } + } + + gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); + gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); + gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); + gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); + gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); + gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); + + for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { + pos = mapRank2pos[rank_old[i]] + 1; + gsl_matrix_set(Result_gamma, w_col, i, pos); + + beta_g[pos - 1].first += gsl_vector_get(beta_old, i); + beta_g[pos - 1].second += 1.0; + } + + if (a_mode == 13) { + pheno_mean += mean_z; + } + + w++; + } + } + } + cout << endl; + + cout << "time on selecting Xgamma: " << time_set << endl; + cout << "time on calculating posterior: " << time_post << endl; + + w_col = w % w_pace; + WriteResult(1, Result_hyp, Result_gamma, w_col); + + gsl_vector *alpha = gsl_vector_alloc(ns_test); + gsl_vector_set_zero(alpha); + WriteParam(beta_g, alpha, w); + gsl_vector_free(alpha); + + gsl_matrix_free(Result_hyp); + gsl_matrix_free(Result_gamma); + + gsl_vector_free(z_hat); + gsl_vector_free(z); + gsl_vector_free(Xb_new); + gsl_vector_free(Xb_old); + + gsl_matrix_free(Xgamma_old); + gsl_matrix_free(XtX_old); + gsl_vector_free(Xtz_old); + gsl_vector_free(beta_old); + + gsl_matrix_free(Xgamma_new); + gsl_matrix_free(XtX_new); + gsl_vector_free(Xtz_new); + gsl_vector_free(beta_new); + + delete[] p_gamma; + beta_g.clear(); + + return; } -- cgit v1.2.3