/* Genome-wide Efficient Mixed Model Association (GEMMA) Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_eigen.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_randist.h" #include "gsl/gsl_roots.h" #include "gsl/gsl_vector.h" #include "bslmm.h" #include "lapack.h" #include "lm.h" #include "lmm.h" #include "mathfunc.h" #include "param.h" using namespace std; void BSLMM::CopyFromParam(PARAM &cPar) { a_mode = cPar.a_mode; d_pace = cPar.d_pace; file_bfile = cPar.file_bfile; file_geno = cPar.file_geno; file_out = cPar.file_out; path_out = cPar.path_out; l_min = cPar.h_min; l_max = cPar.h_max; n_region = cPar.n_region; pve_null = cPar.pve_null; pheno_mean = cPar.pheno_mean; time_UtZ = 0.0; time_Omega = 0.0; n_accept = 0; h_min = cPar.h_min; h_max = cPar.h_max; h_scale = cPar.h_scale; rho_min = cPar.rho_min; rho_max = cPar.rho_max; rho_scale = cPar.rho_scale; logp_min = cPar.logp_min; logp_max = cPar.logp_max; logp_scale = cPar.logp_scale; s_min = cPar.s_min; s_max = cPar.s_max; w_step = cPar.w_step; s_step = cPar.s_step; r_pace = cPar.r_pace; w_pace = cPar.w_pace; n_mh = cPar.n_mh; geo_mean = cPar.geo_mean; randseed = cPar.randseed; trace_G = cPar.trace_G; ni_total = cPar.ni_total; ns_total = cPar.ns_total; ni_test = cPar.ni_test; ns_test = cPar.ns_test; n_cvt = cPar.n_cvt; indicator_idv = cPar.indicator_idv; indicator_snp = cPar.indicator_snp; snpInfo = cPar.snpInfo; return; } void BSLMM::CopyToParam(PARAM &cPar) { cPar.time_UtZ = time_UtZ; cPar.time_Omega = time_Omega; cPar.time_Proposal = time_Proposal; cPar.cHyp_initial = cHyp_initial; cPar.n_accept = n_accept; cPar.pheno_mean = pheno_mean; cPar.randseed = randseed; return; } void BSLMM::WriteBV(const gsl_vector *bv) { string file_str; file_str = path_out + "/" + file_out; file_str += ".bv.txt"; ofstream outfile(file_str.c_str(), ofstream::out); if (!outfile) { cout << "error writing file: " << file_str.c_str() << endl; return; } size_t t = 0; for (size_t i = 0; i < ni_total; ++i) { if (indicator_idv[i] == 0) { outfile << "NA" << endl; } else { outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl; t++; } } outfile.clear(); outfile.close(); return; } void BSLMM::WriteParam(vector> &beta_g, const gsl_vector *alpha, const size_t w) { string file_str; file_str = path_out + "/" + file_out; file_str += ".param.txt"; ofstream outfile(file_str.c_str(), ofstream::out); if (!outfile) { cout << "error writing file: " << file_str.c_str() << endl; return; } outfile << "chr" << "\t" << "rs" << "\t" << "ps" << "\t" << "n_miss" << "\t" << "alpha" << "\t" << "beta" << "\t" << "gamma" << endl; size_t t = 0; for (size_t i = 0; i < ns_total; ++i) { if (indicator_snp[i] == 0) { continue; } outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"; outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t) << "\t"; if (beta_g[t].second != 0) { outfile << beta_g[t].first / beta_g[t].second << "\t" << beta_g[t].second / (double)w << endl; } else { outfile << 0.0 << "\t" << 0.0 << endl; } t++; } outfile.clear(); outfile.close(); return; } void BSLMM::WriteParam(const gsl_vector *alpha) { string file_str; file_str = path_out + "/" + file_out; file_str += ".param.txt"; ofstream outfile(file_str.c_str(), ofstream::out); if (!outfile) { cout << "error writing file: " << file_str.c_str() << endl; return; } outfile << "chr" << "\t" << "rs" << "\t" << "ps" << "\t" << "n_miss" << "\t" << "alpha" << "\t" << "beta" << "\t" << "gamma" << endl; size_t t = 0; for (size_t i = 0; i < ns_total; ++i) { if (indicator_snp[i] == 0) { continue; } outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t" << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"; outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t) << "\t"; outfile << 0.0 << "\t" << 0.0 << endl; t++; } outfile.clear(); outfile.close(); return; } void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp, const gsl_matrix *Result_gamma, const size_t w_col) { string file_gamma, file_hyp; file_gamma = path_out + "/" + file_out; file_gamma += ".gamma.txt"; file_hyp = path_out + "/" + file_out; file_hyp += ".hyp.txt"; ofstream outfile_gamma, outfile_hyp; if (flag == 0) { outfile_gamma.open(file_gamma.c_str(), ofstream::out); outfile_hyp.open(file_hyp.c_str(), ofstream::out); if (!outfile_gamma) { cout << "error writing file: " << file_gamma << endl; return; } if (!outfile_hyp) { cout << "error writing file: " << file_hyp << endl; return; } outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl; for (size_t i = 0; i < s_max; ++i) { outfile_gamma << "s" << i << "\t"; } outfile_gamma << endl; } else { outfile_gamma.open(file_gamma.c_str(), ofstream::app); outfile_hyp.open(file_hyp.c_str(), ofstream::app); if (!outfile_gamma) { cout << "error writing file: " << file_gamma << endl; return; } if (!outfile_hyp) { cout << "error writing file: " << file_hyp << endl; return; } size_t w; if (w_col == 0) { w = w_pace; } else { w = w_col; } for (size_t i = 0; i < w; ++i) { outfile_hyp << scientific; for (size_t j = 0; j < 4; ++j) { outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j) << "\t"; } outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4)) << "\t"; outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t"; outfile_hyp << endl; } for (size_t i = 0; i < w; ++i) { for (size_t j = 0; j < s_max; ++j) { outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t"; } outfile_gamma << endl; } } outfile_hyp.close(); outfile_hyp.clear(); outfile_gamma.close(); outfile_gamma.clear(); return; } void BSLMM::CalcPgamma(double *p_gamma) { double p, s = 0.0; for (size_t i = 0; i < ns_test; ++i) { p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) + 0.3 / (double)ns_test; p_gamma[i] = p; s += p; } for (size_t i = 0; i < ns_test; ++i) { p = p_gamma[i]; p_gamma[i] = p / s; } return; } void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, vector &rank) { size_t pos; for (size_t i = 0; i < rank.size(); ++i) { pos = mapRank2pos[rank[i]]; gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i); gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos); gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector); } return; } double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty, const double sigma_a2) { double pve, var_y; gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2); gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2); gsl_matrix_set_identity(Omega); gsl_matrix_scale(Omega, 1.0 / sigma_a2); lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega); gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); CholeskySolve(Omega, Xty, OiXty); gsl_blas_ddot(Xty, OiXty, &pve); gsl_blas_ddot(Uty, Uty, &var_y); pve /= var_y; gsl_matrix_free(Omega); gsl_vector_free(Xty); gsl_vector_free(OiXty); return pve; } void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty, vector &rank, class HYPBSLMM &cHyp, vector> &pos_loglr) { double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1); cHyp.n_gamma = 0; for (size_t i = 0; i < pos_loglr.size(); ++i) { if (2.0 * pos_loglr[i].second > q_genome) { cHyp.n_gamma++; } } if (cHyp.n_gamma < 10) { cHyp.n_gamma = 10; } if (cHyp.n_gamma > s_max) { cHyp.n_gamma = s_max; } if (cHyp.n_gamma < s_min) { cHyp.n_gamma = s_min; } rank.clear(); for (size_t i = 0; i < cHyp.n_gamma; ++i) { rank.push_back(i); } cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test); cHyp.h = pve_null; if (cHyp.logp == 0) { cHyp.logp = -0.000001; } if (cHyp.h == 0) { cHyp.h = 0.1; } gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma); SetXgamma(UtXgamma, UtX, rank); double sigma_a2; if (trace_G != 0) { sigma_a2 = cHyp.h * 1.0 / (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); } else { sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); } if (sigma_a2 == 0) { sigma_a2 = 0.025; } cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h; gsl_matrix_free(UtXgamma); if (cHyp.rho > 1.0) { cHyp.rho = 1.0; } if (cHyp.h < h_min) { cHyp.h = h_min; } if (cHyp.h > h_max) { cHyp.h = h_max; } if (cHyp.rho < rho_min) { cHyp.rho = rho_min; } if (cHyp.rho > rho_max) { cHyp.rho = rho_max; } if (cHyp.logp < logp_min) { cHyp.logp = logp_min; } if (cHyp.logp > logp_max) { cHyp.logp = logp_max; } cout << "initial value of h = " << cHyp.h << endl; cout << "initial value of rho = " << cHyp.rho << endl; cout << "initial value of pi = " << exp(cHyp.logp) << endl; cout << "initial value of |gamma| = " << cHyp.n_gamma << endl; return; } double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *Utu, gsl_vector *alpha_prime, class HYPBSLMM &cHyp) { double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h)); gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size); gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size); double logpost = 0.0; double d, ds, uy, Hi_yy = 0, logdet_H = 0.0; for (size_t i = 0; i < ni_test; ++i) { d = gsl_vector_get(K_eval, i) * sigma_b2; ds = d / (d + 1.0); d = 1.0 / (d + 1.0); gsl_vector_set(weight_Hi, i, d); logdet_H -= log(d); uy = gsl_vector_get(Uty, i); Hi_yy += d * uy * uy; gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds)); } // Sample tau. double tau = 1.0; if (a_mode == 11) { tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy); } // Sample alpha. gsl_vector_memcpy(alpha_prime, Uty); gsl_vector_mul(alpha_prime, weight_Hi); gsl_vector_scale(alpha_prime, sigma_b2); // Sample u. gsl_vector_memcpy(Utu, alpha_prime); gsl_vector_mul(Utu, K_eval); if (a_mode == 11) { gsl_vector_scale(Utu_rand, sqrt(1.0 / tau)); } gsl_vector_add(Utu, Utu_rand); // For quantitative traits, calculate pve and ppe. if (a_mode == 11) { gsl_blas_ddot(Utu, Utu, &d); cHyp.pve = d / (double)ni_test; cHyp.pve /= cHyp.pve + 1.0 / tau; cHyp.pge = 0.0; } // Calculate likelihood. logpost = -0.5 * logdet_H; if (a_mode == 11) { logpost -= 0.5 * (double)ni_test * log(Hi_yy); } else { logpost -= 0.5 * Hi_yy; } logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp)); gsl_vector_free(Utu_rand); gsl_vector_free(weight_Hi); return logpost; } double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty, const gsl_vector *K_eval, gsl_vector *UtXb, gsl_vector *Utu, gsl_vector *alpha_prime, gsl_vector *beta, class HYPBSLMM &cHyp) { clock_t time_start; double sigma_a2 = cHyp.h * cHyp.rho / (trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h)); double logpost = 0.0; double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0; gsl_matrix *UtXgamma_eval = gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2); gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2); gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2); gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2); gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1); gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1); gsl_matrix_memcpy(UtXgamma_eval, UtXgamma); logdet_H = 0.0; P_yy = 0.0; for (size_t i = 0; i < ni_test; ++i) { gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i); d = gsl_vector_get(K_eval, i) * sigma_b2; ds = d / (d + 1.0); d = 1.0 / (d + 1.0); gsl_vector_set(weight_Hi, i, d); logdet_H -= log(d); uy = gsl_vector_get(Uty, i); P_yy += d * uy * uy; gsl_vector_scale(&UtXgamma_row.vector, d); gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds)); } // Calculate Omega. gsl_matrix_set_identity(Omega); time_start = clock(); lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Calculate beta_hat. gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy); logdet_O = CholeskySolve(Omega, XtHiy, beta_hat); gsl_vector_scale(beta_hat, sigma_a2); gsl_blas_ddot(XtHiy, beta_hat, &d); P_yy -= d; // Sample tau. double tau = 1.0; if (a_mode == 11) { tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); } // Sample beta. for (size_t i = 0; i < beta->size; i++) { d = gsl_ran_gaussian(gsl_r, 1); gsl_vector_set(beta, i, d); } gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta); // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(beta, sqrt(sigma_a2 / tau)); gsl_vector_add(beta, beta_hat); gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb); // Sample alpha. gsl_vector_memcpy(alpha_prime, Uty); gsl_vector_sub(alpha_prime, UtXb); gsl_vector_mul(alpha_prime, weight_Hi); gsl_vector_scale(alpha_prime, sigma_b2); // Sample u. gsl_vector_memcpy(Utu, alpha_prime); gsl_vector_mul(Utu, K_eval); if (a_mode == 11) { gsl_vector_scale(Utu_rand, sqrt(1.0 / tau)); } gsl_vector_add(Utu, Utu_rand); // For quantitative traits, calculate pve and pge. if (a_mode == 11) { gsl_blas_ddot(UtXb, UtXb, &d); cHyp.pge = d / (double)ni_test; gsl_blas_ddot(Utu, Utu, &d); cHyp.pve = cHyp.pge + d / (double)ni_test; if (cHyp.pve == 0) { cHyp.pge = 0.0; } else { cHyp.pge /= cHyp.pve; } cHyp.pve /= cHyp.pve + 1.0 / tau; } gsl_matrix_free(UtXgamma_eval); gsl_matrix_free(Omega); gsl_vector_free(XtHiy); gsl_vector_free(beta_hat); gsl_vector_free(Utu_rand); gsl_vector_free(weight_Hi); logpost = -0.5 * logdet_H - 0.5 * logdet_O; if (a_mode == 11) { logpost -= 0.5 * (double)ni_test * log(P_yy); } else { logpost -= 0.5 * P_yy; } logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; gsl_blas_ddot(Utu, Utu, &d); cHyp.pve = d / (double)ni_test; gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat); cHyp.pve /= cHyp.pve + 1.0; cHyp.pge = 0.0; return; } // Calculate pve and pge, and calculate z_hat for case-control data. void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb, const gsl_vector *Utu, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; gsl_vector *UtXbU = gsl_vector_alloc(Utu->size); gsl_blas_ddot(UtXb, UtXb, &d); cHyp.pge = d / (double)ni_test; gsl_blas_ddot(Utu, Utu, &d); cHyp.pve = cHyp.pge + d / (double)ni_test; gsl_vector_memcpy(UtXbU, Utu); gsl_vector_add(UtXbU, UtXb); gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat); if (cHyp.pve == 0) { cHyp.pge = 0.0; } else { cHyp.pge /= cHyp.pve; } cHyp.pve /= cHyp.pve + 1.0; gsl_vector_free(UtXbU); return; } void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z) { double d1, d2, z_rand = 0.0; for (size_t i = 0; i < z->size; ++i) { d1 = gsl_vector_get(y, i); d2 = gsl_vector_get(z_hat, i); // y is centered for case control studies. if (d1 <= 0.0) { // Control, right truncated. do { z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); } while (z_rand > 0.0); } else { do { z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0); } while (z_rand < 0.0); } gsl_vector_set(z, i, z_rand); } return; } double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) { double h = cHyp_old.h, rho = cHyp_old.rho; double d_h = (h_max - h_min) * h_scale, d_rho = (rho_max - rho_min) * rho_scale; for (size_t i = 0; i < repeat; ++i) { h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h; if (h < h_min) { h = 2 * h_min - h; } if (h > h_max) { h = 2 * h_max - h; } rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho; if (rho < rho_min) { rho = 2 * rho_min - rho; } if (rho > rho_max) { rho = 2 * rho_max - rho; } } cHyp_new.h = h; cHyp_new.rho = rho; return 0.0; } double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) { double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp; double log_ratio = 0.0; double d_logp = min(0.1, (logp_max - logp_min) * logp_scale); for (size_t i = 0; i < repeat; ++i) { logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp; if (logp_new < logp_min) { logp_new = 2 * logp_min - logp_new; } if (logp_new > logp_max) { logp_new = 2 * logp_max - logp_new; } log_ratio += logp_new - logp_old; logp_old = logp_new; } cHyp_new.logp = logp_new; return log_ratio; } bool comp_vec(size_t a, size_t b) { return (a < b); } double BSLMM::ProposeGamma(const vector &rank_old, vector &rank_new, const double *p_gamma, const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, const size_t &repeat) { map mapRank2in; size_t r; double unif, logp = 0.0; int flag_gamma; size_t r_add, r_remove, col_id; rank_new.clear(); if (cHyp_old.n_gamma != rank_old.size()) { cout << "size wrong" << endl; } if (cHyp_old.n_gamma != 0) { for (size_t i = 0; i < rank_old.size(); ++i) { r = rank_old[i]; rank_new.push_back(r); mapRank2in[r] = 1; } } cHyp_new.n_gamma = cHyp_old.n_gamma; for (size_t i = 0; i < repeat; ++i) { unif = gsl_rng_uniform(gsl_r); if (unif < 0.40 && cHyp_new.n_gamma < s_max) { flag_gamma = 1; } else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) { flag_gamma = 2; } else if (unif >= 0.80 && cHyp_new.n_gamma > 0 && cHyp_new.n_gamma < ns_test) { flag_gamma = 3; } else { flag_gamma = 4; } if (flag_gamma == 1) { // Add a SNP. do { r_add = gsl_ran_discrete(gsl_r, gsl_t); } while (mapRank2in.count(r_add) != 0); double prob_total = 1.0; for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { r = rank_new[i]; prob_total -= p_gamma[r]; } mapRank2in[r_add] = 1; rank_new.push_back(r_add); cHyp_new.n_gamma++; logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma); } else if (flag_gamma == 2) { // Delete a SNP. col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); r_remove = rank_new[col_id]; double prob_total = 1.0; for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { r = rank_new[i]; prob_total -= p_gamma[r]; } prob_total += p_gamma[r_remove]; mapRank2in.erase(r_remove); rank_new.erase(rank_new.begin() + col_id); logp += log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma); cHyp_new.n_gamma--; } else if (flag_gamma == 3) { // Switch a SNP. col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma); r_remove = rank_new[col_id]; // Be careful with the proposal. do { r_add = gsl_ran_discrete(gsl_r, gsl_t); } while (mapRank2in.count(r_add) != 0); double prob_total = 1.0; for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { r = rank_new[i]; prob_total -= p_gamma[r]; } logp += log(p_gamma[r_remove] / (prob_total + p_gamma[r_remove] - p_gamma[r_add])); logp -= log(p_gamma[r_add] / prob_total); mapRank2in.erase(r_remove); mapRank2in[r_add] = 1; rank_new.erase(rank_new.begin() + col_id); rank_new.push_back(r_add); } else { logp += 0; } // Do not change. } stable_sort(rank_new.begin(), rank_new.end(), comp_vec); mapRank2in.clear(); return logp; } bool comp_lr(pair a, pair b) { return (a.second > b.second); } // If a_mode==13 then Uty==y. void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y) { clock_t time_start; class HYPBSLMM cHyp_old, cHyp_new; gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6); gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max); gsl_vector *alpha_prime = gsl_vector_alloc(ni_test); gsl_vector *alpha_new = gsl_vector_alloc(ni_test); gsl_vector *alpha_old = gsl_vector_alloc(ni_test); gsl_vector *Utu = gsl_vector_alloc(ni_test); gsl_vector *Utu_new = gsl_vector_alloc(ni_test); gsl_vector *Utu_old = gsl_vector_alloc(ni_test); gsl_vector *UtXb_new = gsl_vector_alloc(ni_test); gsl_vector *UtXb_old = gsl_vector_alloc(ni_test); gsl_vector *z_hat = gsl_vector_alloc(ni_test); gsl_vector *z = gsl_vector_alloc(ni_test); gsl_vector *Utz = gsl_vector_alloc(ni_test); gsl_vector_memcpy(Utz, Uty); double logPost_new, logPost_old; double logMHratio; double mean_z = 0.0; gsl_matrix_set_zero(Result_gamma); gsl_vector_set_zero(Utu); gsl_vector_set_zero(alpha_prime); if (a_mode == 13) { pheno_mean = 0.0; } vector> beta_g; for (size_t i = 0; i < ns_test; i++) { beta_g.push_back(make_pair(0.0, 0.0)); } vector rank_new, rank_old; vector beta_new, beta_old; vector> pos_loglr; time_start = clock(); MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr); time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); for (size_t i = 0; i < ns_test; ++i) { mapRank2pos[i] = pos_loglr[i].first; } // Calculate proposal distribution for gamma (unnormalized), // and set up gsl_r and gsl_t. gsl_rng_env_setup(); const gsl_rng_type *gslType; gslType = gsl_rng_default; if (randseed < 0) { time_t rawtime; time(&rawtime); tm *ptm = gmtime(&rawtime); randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); } gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); double *p_gamma = new double[ns_test]; CalcPgamma(p_gamma); gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); // Initial parameters. InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr); cHyp_initial = cHyp_old; if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); beta_old.clear(); for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { beta_old.push_back(0); } } else { gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); SetXgamma(UtXgamma, UtX, rank_old); logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); beta_old.clear(); for (size_t i = 0; i < beta->size; ++i) { beta_old.push_back(gsl_vector_get(beta, i)); } gsl_matrix_free(UtXgamma); gsl_vector_free(beta); } // Calculate centered z_hat, and pve. if (a_mode == 13) { time_start = clock(); if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); } else { CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); } time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } // Start MCMC. int accept; size_t total_step = w_step + s_step; size_t w = 0, w_col, pos; size_t repeat = 0; for (size_t t = 0; t < total_step; ++t) { if (t % d_pace == 0 || t == total_step - 1) { ProgressBar("Running MCMC ", t, total_step - 1, (double)n_accept / (double)(t * n_mh + 1)); } if (a_mode == 13) { SampleZ(y, z_hat, z); mean_z = CenterVector(z); time_start = clock(); gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz); time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // First proposal. if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); beta_old.clear(); for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { beta_old.push_back(0); } } else { gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma); gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma); SetXgamma(UtXgamma, UtX, rank_old); logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); beta_old.clear(); for (size_t i = 0; i < beta->size; ++i) { beta_old.push_back(gsl_vector_get(beta, i)); } gsl_matrix_free(UtXgamma); gsl_vector_free(beta); } } // M-H steps. for (size_t i = 0; i < n_mh; ++i) { if (gsl_rng_uniform(gsl_r) < 0.33) { repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); } else { repeat = 1; } logMHratio = 0.0; logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); logMHratio += ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) { logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new); beta_new.clear(); for (size_t i = 0; i < cHyp_new.n_gamma; ++i) { beta_new.push_back(0); } } else { gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma); gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma); SetXgamma(UtXgamma, UtX, rank_new); logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new, alpha_new, beta, cHyp_new); beta_new.clear(); for (size_t i = 0; i < beta->size; ++i) { beta_new.push_back(gsl_vector_get(beta, i)); } gsl_matrix_free(UtXgamma); gsl_vector_free(beta); } logMHratio += logPost_new - logPost_old; if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { accept = 1; n_accept++; } else { accept = 0; } if (accept == 1) { logPost_old = logPost_new; rank_old.clear(); beta_old.clear(); if (rank_new.size() != 0) { for (size_t i = 0; i < rank_new.size(); ++i) { rank_old.push_back(rank_new[i]); beta_old.push_back(beta_new[i]); } } cHyp_old = cHyp_new; gsl_vector_memcpy(alpha_old, alpha_new); gsl_vector_memcpy(UtXb_old, UtXb_new); gsl_vector_memcpy(Utu_old, Utu_new); } else { cHyp_new = cHyp_old; } } // Calculate z_hat, and pve. if (a_mode == 13) { time_start = clock(); if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) { CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old); } else { CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old); } // Sample mu and update z_hat. gsl_vector_sub(z, z_hat); mean_z += CenterVector(z); mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); gsl_vector_add_constant(z_hat, mean_z); time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } // Save data. if (t < w_step) { continue; } else { if (t % r_pace == 0) { w_col = w % w_pace; if (w_col == 0) { if (w == 0) { WriteResult(0, Result_hyp, Result_gamma, w_col); } else { WriteResult(1, Result_hyp, Result_gamma, w_col); gsl_matrix_set_zero(Result_hyp); gsl_matrix_set_zero(Result_gamma); } } gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { pos = mapRank2pos[rank_old[i]] + 1; gsl_matrix_set(Result_gamma, w_col, i, pos); beta_g[pos - 1].first += beta_old[i]; beta_g[pos - 1].second += 1.0; } gsl_vector_add(alpha_prime, alpha_old); gsl_vector_add(Utu, Utu_old); if (a_mode == 13) { pheno_mean += mean_z; } w++; } } } cout << endl; w_col = w % w_pace; WriteResult(1, Result_hyp, Result_gamma, w_col); gsl_matrix_free(Result_hyp); gsl_matrix_free(Result_gamma); gsl_vector_free(z_hat); gsl_vector_free(z); gsl_vector_free(Utz); gsl_vector_free(UtXb_new); gsl_vector_free(UtXb_old); gsl_vector_free(alpha_new); gsl_vector_free(alpha_old); gsl_vector_free(Utu_new); gsl_vector_free(Utu_old); gsl_vector_scale(alpha_prime, 1.0 / (double)w); gsl_vector_scale(Utu, 1.0 / (double)w); if (a_mode == 13) { pheno_mean /= (double)w; } gsl_vector *alpha = gsl_vector_alloc(ns_test); gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0, alpha); WriteParam(beta_g, alpha, w); gsl_vector_free(alpha); gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime); WriteBV(alpha_prime); gsl_vector_free(alpha_prime); gsl_vector_free(Utu); delete[] p_gamma; beta_g.clear(); return; } void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, const gsl_vector *eval, const double lambda) { gsl_vector *beta = gsl_vector_alloc(UtX->size2); gsl_vector *H_eval = gsl_vector_alloc(Uty->size); gsl_vector *bv = gsl_vector_alloc(Uty->size); gsl_vector_memcpy(H_eval, eval); gsl_vector_scale(H_eval, lambda); gsl_vector_add_constant(H_eval, 1.0); gsl_vector_memcpy(bv, Uty); gsl_vector_div(bv, H_eval); gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta); gsl_vector_add_constant(H_eval, -1.0); gsl_vector_mul(H_eval, bv); gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv); WriteParam(beta); WriteBV(bv); gsl_vector_free(H_eval); gsl_vector_free(beta); gsl_vector_free(bv); return; } // Below fits MCMC for rho=1. void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) { time_t time_start = clock(); gsl_matrix_const_view X_sub = gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size); lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix); gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector); time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); return; } void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old, const gsl_matrix *XtX_old, const gsl_vector *Xty_old, const gsl_vector *y, const vector &rank_old, const vector &rank_new, gsl_matrix *X_new, gsl_matrix *XtX_new, gsl_vector *Xty_new) { double d; // rank_old and rank_new are sorted already inside PorposeGamma // calculate vectors rank_remove and rank_add. // make sure that v_size is larger than repeat. size_t v_size = 20; vector rank_remove(v_size), rank_add(v_size), rank_union(s_max + v_size); vector::iterator it; it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(), rank_new.end(), rank_remove.begin()); rank_remove.resize(it - rank_remove.begin()); it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(), rank_old.end(), rank_add.begin()); rank_add.resize(it - rank_add.begin()); it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(), rank_old.end(), rank_union.begin()); rank_union.resize(it - rank_union.begin()); // Map rank_remove and rank_add. map mapRank2in_remove, mapRank2in_add; for (size_t i = 0; i < rank_remove.size(); i++) { mapRank2in_remove[rank_remove[i]] = 1; } for (size_t i = 0; i < rank_add.size(); i++) { mapRank2in_add[rank_add[i]] = 1; } // Obtain the subset of matrix/vector. gsl_matrix_const_view Xold_sub = gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size()); gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix( XtX_old, 0, 0, rank_old.size(), rank_old.size()); gsl_vector_const_view Xtyold_sub = gsl_vector_const_subvector(Xty_old, 0, rank_old.size()); gsl_matrix_view Xnew_sub = gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size()); gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size()); gsl_vector_view Xtynew_sub = gsl_vector_subvector(Xty_new, 0, rank_new.size()); // Get X_new and calculate XtX_new. if (rank_remove.size() == 0 && rank_add.size() == 0) { gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector); } else { size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag; if (rank_add.size() == 0) { i_old = 0; i_new = 0; for (size_t i = 0; i < rank_union.size(); i++) { if (mapRank2in_remove.count(rank_old[i_old]) != 0) { i_old++; continue; } gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new); gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old); gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); d = gsl_vector_get(Xty_old, i_old); gsl_vector_set(Xty_new, i_new, d); j_old = i_old; j_new = i_new; for (size_t j = i; j < rank_union.size(); j++) { if (mapRank2in_remove.count(rank_old[j_old]) != 0) { j_old++; continue; } d = gsl_matrix_get(XtX_old, i_old, j_old); gsl_matrix_set(XtX_new, i_new, j_new, d); if (i_new != j_new) { gsl_matrix_set(XtX_new, j_new, i_new, d); } j_old++; j_new++; } i_old++; i_new++; } } else { gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size()); gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2); gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2); gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2); // Get X_add. SetXgamma(X_add, X, rank_add); // Get t(X_add)X_add and t(X_add)X_temp. clock_t time_start = clock(); // Somehow the lapack_dgemm does not work here. gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao); gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add); time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // Save to X_new, XtX_new and Xty_new. i_old = 0; i_new = 0; i_add = 0; for (size_t i = 0; i < rank_union.size(); i++) { if (mapRank2in_remove.count(rank_old[i_old]) != 0) { i_old++; continue; } if (mapRank2in_add.count(rank_new[i_new]) != 0) { i_flag = 1; } else { i_flag = 0; } gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new); if (i_flag == 1) { gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add); gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); } else { gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old); gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector); } if (i_flag == 1) { d = gsl_vector_get(Xty_add, i_add); } else { d = gsl_vector_get(Xty_old, i_old); } gsl_vector_set(Xty_new, i_new, d); j_old = i_old; j_new = i_new; j_add = i_add; for (size_t j = i; j < rank_union.size(); j++) { if (mapRank2in_remove.count(rank_old[j_old]) != 0) { j_old++; continue; } if (mapRank2in_add.count(rank_new[j_new]) != 0) { j_flag = 1; } else { j_flag = 0; } if (i_flag == 1 && j_flag == 1) { d = gsl_matrix_get(XtX_aa, i_add, j_add); } else if (i_flag == 1) { d = gsl_matrix_get(XtX_ao, i_add, j_old); } else if (j_flag == 1) { d = gsl_matrix_get(XtX_ao, j_add, i_old); } else { d = gsl_matrix_get(XtX_old, i_old, j_old); } gsl_matrix_set(XtX_new, i_new, j_new, d); if (i_new != j_new) { gsl_matrix_set(XtX_new, j_new, i_new, d); } j_new++; if (j_flag == 1) { j_add++; } else { j_old++; } } i_new++; if (i_flag == 1) { i_add++; } else { i_old++; } } gsl_matrix_free(X_add); gsl_matrix_free(XtX_aa); gsl_matrix_free(XtX_ao); gsl_vector_free(Xty_add); } } rank_remove.clear(); rank_add.clear(); rank_union.clear(); mapRank2in_remove.clear(); mapRank2in_add.clear(); return; } double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) { double logpost = 0.0; // For quantitative traits, calculate pve and pge. // Pve and pge for case/control data are calculted in CalcCC_PVEnZ. if (a_mode == 11) { cHyp.pve = 0.0; cHyp.pge = 1.0; } // Calculate likelihood. if (a_mode == 11) { logpost -= 0.5 * (double)ni_test * log(yty); } else { logpost -= 0.5 * yty; } logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + ((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp)); return logpost; } double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp) { double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test); double logpost = 0.0; double d, P_yy = yty, logdet_O = 0.0; gsl_matrix_const_view Xgamma_sub = gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size); gsl_matrix_const_view XtX_sub = gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size); gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size); gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size); gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size); gsl_vector *beta_hat = gsl_vector_alloc(s_size); gsl_vector *Xty_temp = gsl_vector_alloc(s_size); gsl_vector_memcpy(Xty_temp, &Xty_sub.vector); // Calculate Omega. gsl_matrix_memcpy(Omega, &XtX_sub.matrix); gsl_matrix_scale(Omega, sigma_a2); gsl_matrix_set_identity(M_temp); gsl_matrix_add(Omega, M_temp); // Calculate beta_hat. logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat); gsl_vector_scale(beta_hat, sigma_a2); gsl_blas_ddot(Xty_temp, beta_hat, &d); P_yy -= d; // Sample tau. double tau = 1.0; if (a_mode == 11) { tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy); } // Sample beta. for (size_t i = 0; i < s_size; i++) { d = gsl_ran_gaussian(gsl_r, 1); gsl_vector_set(beta, i, d); } gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size); gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, &beta_sub.vector); // This computes inv(L^T(Omega)) %*% beta. gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau)); gsl_vector_add(&beta_sub.vector, beta_hat); gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, Xb); // For quantitative traits, calculate pve and pge. if (a_mode == 11) { gsl_blas_ddot(Xb, Xb, &d); cHyp.pve = d / (double)ni_test; cHyp.pve /= cHyp.pve + 1.0 / tau; cHyp.pge = 1.0; } logpost = -0.5 * logdet_O; if (a_mode == 11) { logpost -= 0.5 * (double)ni_test * log(P_yy); } else { logpost -= 0.5 * P_yy; } logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp + ((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp)); gsl_matrix_free(Omega); gsl_matrix_free(M_temp); gsl_vector_free(beta_hat); gsl_vector_free(Xty_temp); return logpost; } // Calculate pve and pge, and calculate z_hat for case-control data. void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) { gsl_vector_set_zero(z_hat); cHyp.pve = 0.0; cHyp.pge = 1.0; return; } // Calculate pve and pge, and calculate z_hat for case-control data. void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat, class HYPBSLMM &cHyp) { double d; gsl_blas_ddot(Xb, Xb, &d); cHyp.pve = d / (double)ni_test; cHyp.pve /= cHyp.pve + 1.0; cHyp.pge = 1.0; gsl_vector_memcpy(z_hat, Xb); return; } // If a_mode==13, then run probit model. void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) { clock_t time_start; double time_set = 0, time_post = 0; class HYPBSLMM cHyp_old, cHyp_new; gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6); gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max); gsl_vector *Xb_new = gsl_vector_alloc(ni_test); gsl_vector *Xb_old = gsl_vector_alloc(ni_test); gsl_vector *z_hat = gsl_vector_alloc(ni_test); gsl_vector *z = gsl_vector_alloc(ni_test); gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max); gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max); gsl_vector *Xtz_old = gsl_vector_alloc(s_max); gsl_vector *beta_old = gsl_vector_alloc(s_max); gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max); gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max); gsl_vector *Xtz_new = gsl_vector_alloc(s_max); gsl_vector *beta_new = gsl_vector_alloc(s_max); double ztz = 0.0; gsl_vector_memcpy(z, y); // For quantitative traits, y is centered already in // gemma.cpp, but just in case. double mean_z = CenterVector(z); gsl_blas_ddot(z, z, &ztz); double logPost_new, logPost_old; double logMHratio; gsl_matrix_set_zero(Result_gamma); if (a_mode == 13) { pheno_mean = 0.0; } vector> beta_g; for (size_t i = 0; i < ns_test; i++) { beta_g.push_back(make_pair(0.0, 0.0)); } vector rank_new, rank_old; vector> pos_loglr; time_start = clock(); MatrixCalcLmLR(X, z, pos_loglr); time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr); for (size_t i = 0; i < ns_test; ++i) { mapRank2pos[i] = pos_loglr[i].first; } // Calculate proposal distribution for gamma (unnormalized), // and set up gsl_r and gsl_t. gsl_rng_env_setup(); const gsl_rng_type *gslType; gslType = gsl_rng_default; if (randseed < 0) { time_t rawtime; time(&rawtime); tm *ptm = gmtime(&rawtime); randseed = (unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec); } gsl_r = gsl_rng_alloc(gslType); gsl_rng_set(gsl_r, randseed); double *p_gamma = new double[ns_test]; CalcPgamma(p_gamma); gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma); // Initial parameters. InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr); cHyp_initial = cHyp_old; if (cHyp_old.n_gamma == 0) { logPost_old = CalcPosterior(ztz, cHyp_old); } else { SetXgamma(Xgamma_old, X, rank_old); CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old); logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); } // Calculate centered z_hat, and pve. if (a_mode == 13) { if (cHyp_old.n_gamma == 0) { CalcCC_PVEnZ(z_hat, cHyp_old); } else { CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); } } // Start MCMC. int accept; size_t total_step = w_step + s_step; size_t w = 0, w_col, pos; size_t repeat = 0; for (size_t t = 0; t < total_step; ++t) { if (t % d_pace == 0 || t == total_step - 1) { ProgressBar("Running MCMC ", t, total_step - 1, (double)n_accept / (double)(t * n_mh + 1)); } if (a_mode == 13) { SampleZ(y, z_hat, z); mean_z = CenterVector(z); gsl_blas_ddot(z, z, &ztz); // First proposal. if (cHyp_old.n_gamma == 0) { logPost_old = CalcPosterior(ztz, cHyp_old); } else { gsl_matrix_view Xold_sub = gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size()); gsl_vector_view Xtz_sub = gsl_vector_subvector(Xtz_old, 0, rank_old.size()); gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0, &Xtz_sub.vector); logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(), Xb_old, beta_old, cHyp_old); } } // M-H steps. for (size_t i = 0; i < n_mh; ++i) { if (gsl_rng_uniform(gsl_r) < 0.33) { repeat = 1 + gsl_rng_uniform_int(gsl_r, 20); } else { repeat = 1; } logMHratio = 0.0; logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat); logMHratio += ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat); logMHratio += ProposePi(cHyp_old, cHyp_new, repeat); if (cHyp_new.n_gamma == 0) { logPost_new = CalcPosterior(ztz, cHyp_new); } else { // This makes sure that rank_old.size() == // rank_remove.size() does not happen. if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) { time_start = clock(); SetXgamma(Xgamma_new, X, rank_new); CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new); time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } else { time_start = clock(); SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new, Xgamma_new, XtX_new, Xtz_new); time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } time_start = clock(); logPost_new = CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(), Xb_new, beta_new, cHyp_new); time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } logMHratio += logPost_new - logPost_old; if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) { accept = 1; n_accept++; } else { accept = 0; } if (accept == 1) { logPost_old = logPost_new; cHyp_old = cHyp_new; gsl_vector_memcpy(Xb_old, Xb_new); rank_old.clear(); if (rank_new.size() != 0) { for (size_t i = 0; i < rank_new.size(); ++i) { rank_old.push_back(rank_new[i]); } gsl_matrix_view Xold_sub = gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size()); gsl_matrix_view XtXold_sub = gsl_matrix_submatrix( XtX_old, 0, 0, rank_new.size(), rank_new.size()); gsl_vector_view Xtzold_sub = gsl_vector_subvector(Xtz_old, 0, rank_new.size()); gsl_vector_view betaold_sub = gsl_vector_subvector(beta_old, 0, rank_new.size()); gsl_matrix_view Xnew_sub = gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size()); gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix( XtX_new, 0, 0, rank_new.size(), rank_new.size()); gsl_vector_view Xtznew_sub = gsl_vector_subvector(Xtz_new, 0, rank_new.size()); gsl_vector_view betanew_sub = gsl_vector_subvector(beta_new, 0, rank_new.size()); gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix); gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix); gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector); gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector); } } else { cHyp_new = cHyp_old; } } // Calculate z_hat, and pve. if (a_mode == 13) { if (cHyp_old.n_gamma == 0) { CalcCC_PVEnZ(z_hat, cHyp_old); } else { CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old); } // Sample mu and update z_hat. gsl_vector_sub(z, z_hat); mean_z += CenterVector(z); mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test)); gsl_vector_add_constant(z_hat, mean_z); } // Save data. if (t < w_step) { continue; } else { if (t % r_pace == 0) { w_col = w % w_pace; if (w_col == 0) { if (w == 0) { WriteResult(0, Result_hyp, Result_gamma, w_col); } else { WriteResult(1, Result_hyp, Result_gamma, w_col); gsl_matrix_set_zero(Result_hyp); gsl_matrix_set_zero(Result_gamma); } } gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h); gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve); gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho); gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge); gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp); gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma); for (size_t i = 0; i < cHyp_old.n_gamma; ++i) { pos = mapRank2pos[rank_old[i]] + 1; gsl_matrix_set(Result_gamma, w_col, i, pos); beta_g[pos - 1].first += gsl_vector_get(beta_old, i); beta_g[pos - 1].second += 1.0; } if (a_mode == 13) { pheno_mean += mean_z; } w++; } } } cout << endl; cout << "time on selecting Xgamma: " << time_set << endl; cout << "time on calculating posterior: " << time_post << endl; w_col = w % w_pace; WriteResult(1, Result_hyp, Result_gamma, w_col); gsl_vector *alpha = gsl_vector_alloc(ns_test); gsl_vector_set_zero(alpha); WriteParam(beta_g, alpha, w); gsl_vector_free(alpha); gsl_matrix_free(Result_hyp); gsl_matrix_free(Result_gamma); gsl_vector_free(z_hat); gsl_vector_free(z); gsl_vector_free(Xb_new); gsl_vector_free(Xb_old); gsl_matrix_free(Xgamma_old); gsl_matrix_free(XtX_old); gsl_vector_free(Xtz_old); gsl_vector_free(beta_old); gsl_matrix_free(Xgamma_new); gsl_matrix_free(XtX_new); gsl_vector_free(Xtz_new); gsl_vector_free(beta_new); delete[] p_gamma; beta_g.clear(); return; }