/*
Genome-wide Efficient Mixed Model Association (GEMMA)
Copyright (C) 2011-2017, Xiang Zhou
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_eigen.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_randist.h"
#include "gsl/gsl_roots.h"
#include "gsl/gsl_vector.h"
#include "bslmm.h"
#include "lapack.h"
#include "lm.h"
#include "lmm.h"
#include "mathfunc.h"
#include "param.h"
using namespace std;
void BSLMM::CopyFromParam(PARAM &cPar) {
a_mode = cPar.a_mode;
d_pace = cPar.d_pace;
file_bfile = cPar.file_bfile;
file_geno = cPar.file_geno;
file_out = cPar.file_out;
path_out = cPar.path_out;
l_min = cPar.h_min;
l_max = cPar.h_max;
n_region = cPar.n_region;
pve_null = cPar.pve_null;
pheno_mean = cPar.pheno_mean;
time_UtZ = 0.0;
time_Omega = 0.0;
n_accept = 0;
h_min = cPar.h_min;
h_max = cPar.h_max;
h_scale = cPar.h_scale;
rho_min = cPar.rho_min;
rho_max = cPar.rho_max;
rho_scale = cPar.rho_scale;
logp_min = cPar.logp_min;
logp_max = cPar.logp_max;
logp_scale = cPar.logp_scale;
s_min = cPar.s_min;
s_max = cPar.s_max;
w_step = cPar.w_step;
s_step = cPar.s_step;
r_pace = cPar.r_pace;
w_pace = cPar.w_pace;
n_mh = cPar.n_mh;
geo_mean = cPar.geo_mean;
randseed = cPar.randseed;
trace_G = cPar.trace_G;
ni_total = cPar.ni_total;
ns_total = cPar.ns_total;
ni_test = cPar.ni_test;
ns_test = cPar.ns_test;
n_cvt = cPar.n_cvt;
indicator_idv = cPar.indicator_idv;
indicator_snp = cPar.indicator_snp;
snpInfo = cPar.snpInfo;
return;
}
void BSLMM::CopyToParam(PARAM &cPar) {
cPar.time_UtZ = time_UtZ;
cPar.time_Omega = time_Omega;
cPar.time_Proposal = time_Proposal;
cPar.cHyp_initial = cHyp_initial;
cPar.n_accept = n_accept;
cPar.pheno_mean = pheno_mean;
cPar.randseed = randseed;
return;
}
void BSLMM::WriteBV(const gsl_vector *bv) {
string file_str;
file_str = path_out + "/" + file_out;
file_str += ".bv.txt";
ofstream outfile(file_str.c_str(), ofstream::out);
if (!outfile) {
cout << "error writing file: " << file_str.c_str() << endl;
return;
}
size_t t = 0;
for (size_t i = 0; i < ni_total; ++i) {
if (indicator_idv[i] == 0) {
outfile << "NA" << endl;
} else {
outfile << scientific << setprecision(6) << gsl_vector_get(bv, t) << endl;
t++;
}
}
outfile.clear();
outfile.close();
return;
}
void BSLMM::WriteParam(vector> &beta_g,
const gsl_vector *alpha, const size_t w) {
string file_str;
file_str = path_out + "/" + file_out;
file_str += ".param.txt";
ofstream outfile(file_str.c_str(), ofstream::out);
if (!outfile) {
cout << "error writing file: " << file_str.c_str() << endl;
return;
}
outfile << "chr"
<< "\t"
<< "rs"
<< "\t"
<< "ps"
<< "\t"
<< "n_miss"
<< "\t"
<< "alpha"
<< "\t"
<< "beta"
<< "\t"
<< "gamma" << endl;
size_t t = 0;
for (size_t i = 0; i < ns_total; ++i) {
if (indicator_snp[i] == 0) {
continue;
}
outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
<< snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
<< "\t";
if (beta_g[t].second != 0) {
outfile << beta_g[t].first / beta_g[t].second << "\t"
<< beta_g[t].second / (double)w << endl;
} else {
outfile << 0.0 << "\t" << 0.0 << endl;
}
t++;
}
outfile.clear();
outfile.close();
return;
}
void BSLMM::WriteParam(const gsl_vector *alpha) {
string file_str;
file_str = path_out + "/" + file_out;
file_str += ".param.txt";
ofstream outfile(file_str.c_str(), ofstream::out);
if (!outfile) {
cout << "error writing file: " << file_str.c_str() << endl;
return;
}
outfile << "chr"
<< "\t"
<< "rs"
<< "\t"
<< "ps"
<< "\t"
<< "n_miss"
<< "\t"
<< "alpha"
<< "\t"
<< "beta"
<< "\t"
<< "gamma" << endl;
size_t t = 0;
for (size_t i = 0; i < ns_total; ++i) {
if (indicator_snp[i] == 0) {
continue;
}
outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
<< snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t";
outfile << scientific << setprecision(6) << gsl_vector_get(alpha, t)
<< "\t";
outfile << 0.0 << "\t" << 0.0 << endl;
t++;
}
outfile.clear();
outfile.close();
return;
}
void BSLMM::WriteResult(const int flag, const gsl_matrix *Result_hyp,
const gsl_matrix *Result_gamma, const size_t w_col) {
string file_gamma, file_hyp;
file_gamma = path_out + "/" + file_out;
file_gamma += ".gamma.txt";
file_hyp = path_out + "/" + file_out;
file_hyp += ".hyp.txt";
ofstream outfile_gamma, outfile_hyp;
if (flag == 0) {
outfile_gamma.open(file_gamma.c_str(), ofstream::out);
outfile_hyp.open(file_hyp.c_str(), ofstream::out);
if (!outfile_gamma) {
cout << "error writing file: " << file_gamma << endl;
return;
}
if (!outfile_hyp) {
cout << "error writing file: " << file_hyp << endl;
return;
}
outfile_hyp << "h \t pve \t rho \t pge \t pi \t n_gamma" << endl;
for (size_t i = 0; i < s_max; ++i) {
outfile_gamma << "s" << i << "\t";
}
outfile_gamma << endl;
} else {
outfile_gamma.open(file_gamma.c_str(), ofstream::app);
outfile_hyp.open(file_hyp.c_str(), ofstream::app);
if (!outfile_gamma) {
cout << "error writing file: " << file_gamma << endl;
return;
}
if (!outfile_hyp) {
cout << "error writing file: " << file_hyp << endl;
return;
}
size_t w;
if (w_col == 0) {
w = w_pace;
} else {
w = w_col;
}
for (size_t i = 0; i < w; ++i) {
outfile_hyp << scientific;
for (size_t j = 0; j < 4; ++j) {
outfile_hyp << setprecision(6) << gsl_matrix_get(Result_hyp, i, j)
<< "\t";
}
outfile_hyp << setprecision(6) << exp(gsl_matrix_get(Result_hyp, i, 4))
<< "\t";
outfile_hyp << (int)gsl_matrix_get(Result_hyp, i, 5) << "\t";
outfile_hyp << endl;
}
for (size_t i = 0; i < w; ++i) {
for (size_t j = 0; j < s_max; ++j) {
outfile_gamma << (int)gsl_matrix_get(Result_gamma, i, j) << "\t";
}
outfile_gamma << endl;
}
}
outfile_hyp.close();
outfile_hyp.clear();
outfile_gamma.close();
outfile_gamma.clear();
return;
}
void BSLMM::CalcPgamma(double *p_gamma) {
double p, s = 0.0;
for (size_t i = 0; i < ns_test; ++i) {
p = 0.7 * gsl_ran_geometric_pdf(i + 1, 1.0 / geo_mean) +
0.3 / (double)ns_test;
p_gamma[i] = p;
s += p;
}
for (size_t i = 0; i < ns_test; ++i) {
p = p_gamma[i];
p_gamma[i] = p / s;
}
return;
}
void BSLMM::SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X,
vector &rank) {
size_t pos;
for (size_t i = 0; i < rank.size(); ++i) {
pos = mapRank2pos[rank[i]];
gsl_vector_view Xgamma_col = gsl_matrix_column(Xgamma, i);
gsl_vector_const_view X_col = gsl_matrix_const_column(X, pos);
gsl_vector_memcpy(&Xgamma_col.vector, &X_col.vector);
}
return;
}
double BSLMM::CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
const double sigma_a2) {
double pve, var_y;
gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
gsl_vector *Xty = gsl_vector_alloc(UtXgamma->size2);
gsl_vector *OiXty = gsl_vector_alloc(UtXgamma->size2);
gsl_matrix_set_identity(Omega);
gsl_matrix_scale(Omega, 1.0 / sigma_a2);
lapack_dgemm((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega);
gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty);
CholeskySolve(Omega, Xty, OiXty);
gsl_blas_ddot(Xty, OiXty, &pve);
gsl_blas_ddot(Uty, Uty, &var_y);
pve /= var_y;
gsl_matrix_free(Omega);
gsl_vector_free(Xty);
gsl_vector_free(OiXty);
return pve;
}
void BSLMM::InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty,
vector &rank, class HYPBSLMM &cHyp,
vector> &pos_loglr) {
double q_genome = gsl_cdf_chisq_Qinv(0.05 / (double)ns_test, 1);
cHyp.n_gamma = 0;
for (size_t i = 0; i < pos_loglr.size(); ++i) {
if (2.0 * pos_loglr[i].second > q_genome) {
cHyp.n_gamma++;
}
}
if (cHyp.n_gamma < 10) {
cHyp.n_gamma = 10;
}
if (cHyp.n_gamma > s_max) {
cHyp.n_gamma = s_max;
}
if (cHyp.n_gamma < s_min) {
cHyp.n_gamma = s_min;
}
rank.clear();
for (size_t i = 0; i < cHyp.n_gamma; ++i) {
rank.push_back(i);
}
cHyp.logp = log((double)cHyp.n_gamma / (double)ns_test);
cHyp.h = pve_null;
if (cHyp.logp == 0) {
cHyp.logp = -0.000001;
}
if (cHyp.h == 0) {
cHyp.h = 0.1;
}
gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp.n_gamma);
SetXgamma(UtXgamma, UtX, rank);
double sigma_a2;
if (trace_G != 0) {
sigma_a2 = cHyp.h * 1.0 /
(trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
} else {
sigma_a2 = cHyp.h * 1.0 / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
}
if (sigma_a2 == 0) {
sigma_a2 = 0.025;
}
cHyp.rho = CalcPveLM(UtXgamma, Uty, sigma_a2) / cHyp.h;
gsl_matrix_free(UtXgamma);
if (cHyp.rho > 1.0) {
cHyp.rho = 1.0;
}
if (cHyp.h < h_min) {
cHyp.h = h_min;
}
if (cHyp.h > h_max) {
cHyp.h = h_max;
}
if (cHyp.rho < rho_min) {
cHyp.rho = rho_min;
}
if (cHyp.rho > rho_max) {
cHyp.rho = rho_max;
}
if (cHyp.logp < logp_min) {
cHyp.logp = logp_min;
}
if (cHyp.logp > logp_max) {
cHyp.logp = logp_max;
}
cout << "initial value of h = " << cHyp.h << endl;
cout << "initial value of rho = " << cHyp.rho << endl;
cout << "initial value of pi = " << exp(cHyp.logp) << endl;
cout << "initial value of |gamma| = " << cHyp.n_gamma << endl;
return;
}
double BSLMM::CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval,
gsl_vector *Utu, gsl_vector *alpha_prime,
class HYPBSLMM &cHyp) {
double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
gsl_vector *Utu_rand = gsl_vector_alloc(Uty->size);
gsl_vector *weight_Hi = gsl_vector_alloc(Uty->size);
double logpost = 0.0;
double d, ds, uy, Hi_yy = 0, logdet_H = 0.0;
for (size_t i = 0; i < ni_test; ++i) {
d = gsl_vector_get(K_eval, i) * sigma_b2;
ds = d / (d + 1.0);
d = 1.0 / (d + 1.0);
gsl_vector_set(weight_Hi, i, d);
logdet_H -= log(d);
uy = gsl_vector_get(Uty, i);
Hi_yy += d * uy * uy;
gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
}
// Sample tau.
double tau = 1.0;
if (a_mode == 11) {
tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / Hi_yy);
}
// Sample alpha.
gsl_vector_memcpy(alpha_prime, Uty);
gsl_vector_mul(alpha_prime, weight_Hi);
gsl_vector_scale(alpha_prime, sigma_b2);
// Sample u.
gsl_vector_memcpy(Utu, alpha_prime);
gsl_vector_mul(Utu, K_eval);
if (a_mode == 11) {
gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
}
gsl_vector_add(Utu, Utu_rand);
// For quantitative traits, calculate pve and ppe.
if (a_mode == 11) {
gsl_blas_ddot(Utu, Utu, &d);
cHyp.pve = d / (double)ni_test;
cHyp.pve /= cHyp.pve + 1.0 / tau;
cHyp.pge = 0.0;
}
// Calculate likelihood.
logpost = -0.5 * logdet_H;
if (a_mode == 11) {
logpost -= 0.5 * (double)ni_test * log(Hi_yy);
} else {
logpost -= 0.5 * Hi_yy;
}
logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
gsl_vector_free(Utu_rand);
gsl_vector_free(weight_Hi);
return logpost;
}
double BSLMM::CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty,
const gsl_vector *K_eval, gsl_vector *UtXb,
gsl_vector *Utu, gsl_vector *alpha_prime,
gsl_vector *beta, class HYPBSLMM &cHyp) {
clock_t time_start;
double sigma_a2 = cHyp.h * cHyp.rho /
(trace_G * (1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
double sigma_b2 = cHyp.h * (1.0 - cHyp.rho) / (trace_G * (1 - cHyp.h));
double logpost = 0.0;
double d, ds, uy, P_yy = 0, logdet_O = 0.0, logdet_H = 0.0;
gsl_matrix *UtXgamma_eval =
gsl_matrix_alloc(UtXgamma->size1, UtXgamma->size2);
gsl_matrix *Omega = gsl_matrix_alloc(UtXgamma->size2, UtXgamma->size2);
gsl_vector *XtHiy = gsl_vector_alloc(UtXgamma->size2);
gsl_vector *beta_hat = gsl_vector_alloc(UtXgamma->size2);
gsl_vector *Utu_rand = gsl_vector_alloc(UtXgamma->size1);
gsl_vector *weight_Hi = gsl_vector_alloc(UtXgamma->size1);
gsl_matrix_memcpy(UtXgamma_eval, UtXgamma);
logdet_H = 0.0;
P_yy = 0.0;
for (size_t i = 0; i < ni_test; ++i) {
gsl_vector_view UtXgamma_row = gsl_matrix_row(UtXgamma_eval, i);
d = gsl_vector_get(K_eval, i) * sigma_b2;
ds = d / (d + 1.0);
d = 1.0 / (d + 1.0);
gsl_vector_set(weight_Hi, i, d);
logdet_H -= log(d);
uy = gsl_vector_get(Uty, i);
P_yy += d * uy * uy;
gsl_vector_scale(&UtXgamma_row.vector, d);
gsl_vector_set(Utu_rand, i, gsl_ran_gaussian(gsl_r, 1) * sqrt(ds));
}
// Calculate Omega.
gsl_matrix_set_identity(Omega);
time_start = clock();
lapack_dgemm((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0,
Omega);
time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Calculate beta_hat.
gsl_blas_dgemv(CblasTrans, 1.0, UtXgamma_eval, Uty, 0.0, XtHiy);
logdet_O = CholeskySolve(Omega, XtHiy, beta_hat);
gsl_vector_scale(beta_hat, sigma_a2);
gsl_blas_ddot(XtHiy, beta_hat, &d);
P_yy -= d;
// Sample tau.
double tau = 1.0;
if (a_mode == 11) {
tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
}
// Sample beta.
for (size_t i = 0; i < beta->size; i++) {
d = gsl_ran_gaussian(gsl_r, 1);
gsl_vector_set(beta, i, d);
}
gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, beta);
// This computes inv(L^T(Omega)) %*% beta.
gsl_vector_scale(beta, sqrt(sigma_a2 / tau));
gsl_vector_add(beta, beta_hat);
gsl_blas_dgemv(CblasNoTrans, 1.0, UtXgamma, beta, 0.0, UtXb);
// Sample alpha.
gsl_vector_memcpy(alpha_prime, Uty);
gsl_vector_sub(alpha_prime, UtXb);
gsl_vector_mul(alpha_prime, weight_Hi);
gsl_vector_scale(alpha_prime, sigma_b2);
// Sample u.
gsl_vector_memcpy(Utu, alpha_prime);
gsl_vector_mul(Utu, K_eval);
if (a_mode == 11) {
gsl_vector_scale(Utu_rand, sqrt(1.0 / tau));
}
gsl_vector_add(Utu, Utu_rand);
// For quantitative traits, calculate pve and pge.
if (a_mode == 11) {
gsl_blas_ddot(UtXb, UtXb, &d);
cHyp.pge = d / (double)ni_test;
gsl_blas_ddot(Utu, Utu, &d);
cHyp.pve = cHyp.pge + d / (double)ni_test;
if (cHyp.pve == 0) {
cHyp.pge = 0.0;
} else {
cHyp.pge /= cHyp.pve;
}
cHyp.pve /= cHyp.pve + 1.0 / tau;
}
gsl_matrix_free(UtXgamma_eval);
gsl_matrix_free(Omega);
gsl_vector_free(XtHiy);
gsl_vector_free(beta_hat);
gsl_vector_free(Utu_rand);
gsl_vector_free(weight_Hi);
logpost = -0.5 * logdet_H - 0.5 * logdet_O;
if (a_mode == 11) {
logpost -= 0.5 * (double)ni_test * log(P_yy);
} else {
logpost -= 0.5 * P_yy;
}
logpost +=
((double)cHyp.n_gamma - 1.0) * cHyp.logp +
((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu,
gsl_vector *z_hat, class HYPBSLMM &cHyp) {
double d;
gsl_blas_ddot(Utu, Utu, &d);
cHyp.pve = d / (double)ni_test;
gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, z_hat);
cHyp.pve /= cHyp.pve + 1.0;
cHyp.pge = 0.0;
return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
void BSLMM::CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb,
const gsl_vector *Utu, gsl_vector *z_hat,
class HYPBSLMM &cHyp) {
double d;
gsl_vector *UtXbU = gsl_vector_alloc(Utu->size);
gsl_blas_ddot(UtXb, UtXb, &d);
cHyp.pge = d / (double)ni_test;
gsl_blas_ddot(Utu, Utu, &d);
cHyp.pve = cHyp.pge + d / (double)ni_test;
gsl_vector_memcpy(UtXbU, Utu);
gsl_vector_add(UtXbU, UtXb);
gsl_blas_dgemv(CblasNoTrans, 1.0, U, UtXbU, 0.0, z_hat);
if (cHyp.pve == 0) {
cHyp.pge = 0.0;
} else {
cHyp.pge /= cHyp.pve;
}
cHyp.pve /= cHyp.pve + 1.0;
gsl_vector_free(UtXbU);
return;
}
void BSLMM::SampleZ(const gsl_vector *y, const gsl_vector *z_hat,
gsl_vector *z) {
double d1, d2, z_rand = 0.0;
for (size_t i = 0; i < z->size; ++i) {
d1 = gsl_vector_get(y, i);
d2 = gsl_vector_get(z_hat, i);
// y is centered for case control studies.
if (d1 <= 0.0) {
// Control, right truncated.
do {
z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand > 0.0);
} else {
do {
z_rand = d2 + gsl_ran_gaussian(gsl_r, 1.0);
} while (z_rand < 0.0);
}
gsl_vector_set(z, i, z_rand);
}
return;
}
double BSLMM::ProposeHnRho(const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
double h = cHyp_old.h, rho = cHyp_old.rho;
double d_h = (h_max - h_min) * h_scale,
d_rho = (rho_max - rho_min) * rho_scale;
for (size_t i = 0; i < repeat; ++i) {
h = h + (gsl_rng_uniform(gsl_r) - 0.5) * d_h;
if (h < h_min) {
h = 2 * h_min - h;
}
if (h > h_max) {
h = 2 * h_max - h;
}
rho = rho + (gsl_rng_uniform(gsl_r) - 0.5) * d_rho;
if (rho < rho_min) {
rho = 2 * rho_min - rho;
}
if (rho > rho_max) {
rho = 2 * rho_max - rho;
}
}
cHyp_new.h = h;
cHyp_new.rho = rho;
return 0.0;
}
double BSLMM::ProposePi(const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
double logp_old = cHyp_old.logp, logp_new = cHyp_old.logp;
double log_ratio = 0.0;
double d_logp = min(0.1, (logp_max - logp_min) * logp_scale);
for (size_t i = 0; i < repeat; ++i) {
logp_new = logp_old + (gsl_rng_uniform(gsl_r) - 0.5) * d_logp;
if (logp_new < logp_min) {
logp_new = 2 * logp_min - logp_new;
}
if (logp_new > logp_max) {
logp_new = 2 * logp_max - logp_new;
}
log_ratio += logp_new - logp_old;
logp_old = logp_new;
}
cHyp_new.logp = logp_new;
return log_ratio;
}
bool comp_vec(size_t a, size_t b) { return (a < b); }
double BSLMM::ProposeGamma(const vector &rank_old,
vector &rank_new, const double *p_gamma,
const class HYPBSLMM &cHyp_old,
class HYPBSLMM &cHyp_new, const size_t &repeat) {
map mapRank2in;
size_t r;
double unif, logp = 0.0;
int flag_gamma;
size_t r_add, r_remove, col_id;
rank_new.clear();
if (cHyp_old.n_gamma != rank_old.size()) {
cout << "size wrong" << endl;
}
if (cHyp_old.n_gamma != 0) {
for (size_t i = 0; i < rank_old.size(); ++i) {
r = rank_old[i];
rank_new.push_back(r);
mapRank2in[r] = 1;
}
}
cHyp_new.n_gamma = cHyp_old.n_gamma;
for (size_t i = 0; i < repeat; ++i) {
unif = gsl_rng_uniform(gsl_r);
if (unif < 0.40 && cHyp_new.n_gamma < s_max) {
flag_gamma = 1;
} else if (unif >= 0.40 && unif < 0.80 && cHyp_new.n_gamma > s_min) {
flag_gamma = 2;
} else if (unif >= 0.80 && cHyp_new.n_gamma > 0 &&
cHyp_new.n_gamma < ns_test) {
flag_gamma = 3;
} else {
flag_gamma = 4;
}
if (flag_gamma == 1) {
// Add a SNP.
do {
r_add = gsl_ran_discrete(gsl_r, gsl_t);
} while (mapRank2in.count(r_add) != 0);
double prob_total = 1.0;
for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
r = rank_new[i];
prob_total -= p_gamma[r];
}
mapRank2in[r_add] = 1;
rank_new.push_back(r_add);
cHyp_new.n_gamma++;
logp += -log(p_gamma[r_add] / prob_total) - log((double)cHyp_new.n_gamma);
} else if (flag_gamma == 2) {
// Delete a SNP.
col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
r_remove = rank_new[col_id];
double prob_total = 1.0;
for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
r = rank_new[i];
prob_total -= p_gamma[r];
}
prob_total += p_gamma[r_remove];
mapRank2in.erase(r_remove);
rank_new.erase(rank_new.begin() + col_id);
logp +=
log(p_gamma[r_remove] / prob_total) + log((double)cHyp_new.n_gamma);
cHyp_new.n_gamma--;
} else if (flag_gamma == 3) {
// Switch a SNP.
col_id = gsl_rng_uniform_int(gsl_r, cHyp_new.n_gamma);
r_remove = rank_new[col_id];
// Be careful with the proposal.
do {
r_add = gsl_ran_discrete(gsl_r, gsl_t);
} while (mapRank2in.count(r_add) != 0);
double prob_total = 1.0;
for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
r = rank_new[i];
prob_total -= p_gamma[r];
}
logp += log(p_gamma[r_remove] /
(prob_total + p_gamma[r_remove] - p_gamma[r_add]));
logp -= log(p_gamma[r_add] / prob_total);
mapRank2in.erase(r_remove);
mapRank2in[r_add] = 1;
rank_new.erase(rank_new.begin() + col_id);
rank_new.push_back(r_add);
} else {
logp += 0;
} // Do not change.
}
stable_sort(rank_new.begin(), rank_new.end(), comp_vec);
mapRank2in.clear();
return logp;
}
bool comp_lr(pair a, pair b) {
return (a.second > b.second);
}
// If a_mode==13 then Uty==y.
void BSLMM::MCMC(const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *K_eval,
const gsl_vector *y) {
clock_t time_start;
class HYPBSLMM cHyp_old, cHyp_new;
gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
gsl_vector *alpha_prime = gsl_vector_alloc(ni_test);
gsl_vector *alpha_new = gsl_vector_alloc(ni_test);
gsl_vector *alpha_old = gsl_vector_alloc(ni_test);
gsl_vector *Utu = gsl_vector_alloc(ni_test);
gsl_vector *Utu_new = gsl_vector_alloc(ni_test);
gsl_vector *Utu_old = gsl_vector_alloc(ni_test);
gsl_vector *UtXb_new = gsl_vector_alloc(ni_test);
gsl_vector *UtXb_old = gsl_vector_alloc(ni_test);
gsl_vector *z_hat = gsl_vector_alloc(ni_test);
gsl_vector *z = gsl_vector_alloc(ni_test);
gsl_vector *Utz = gsl_vector_alloc(ni_test);
gsl_vector_memcpy(Utz, Uty);
double logPost_new, logPost_old;
double logMHratio;
double mean_z = 0.0;
gsl_matrix_set_zero(Result_gamma);
gsl_vector_set_zero(Utu);
gsl_vector_set_zero(alpha_prime);
if (a_mode == 13) {
pheno_mean = 0.0;
}
vector> beta_g;
for (size_t i = 0; i < ns_test; i++) {
beta_g.push_back(make_pair(0.0, 0.0));
}
vector rank_new, rank_old;
vector beta_new, beta_old;
vector> pos_loglr;
time_start = clock();
MatrixCalcLR(U, UtX, Utz, K_eval, l_min, l_max, n_region, pos_loglr);
time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
for (size_t i = 0; i < ns_test; ++i) {
mapRank2pos[i] = pos_loglr[i].first;
}
// Calculate proposal distribution for gamma (unnormalized),
// and set up gsl_r and gsl_t.
gsl_rng_env_setup();
const gsl_rng_type *gslType;
gslType = gsl_rng_default;
if (randseed < 0) {
time_t rawtime;
time(&rawtime);
tm *ptm = gmtime(&rawtime);
randseed =
(unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
}
gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
double *p_gamma = new double[ns_test];
CalcPgamma(p_gamma);
gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
// Initial parameters.
InitialMCMC(UtX, Utz, rank_old, cHyp_old, pos_loglr);
cHyp_initial = cHyp_old;
if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
beta_old.clear();
for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
beta_old.push_back(0);
}
} else {
gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
SetXgamma(UtXgamma, UtX, rank_old);
logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
alpha_old, beta, cHyp_old);
beta_old.clear();
for (size_t i = 0; i < beta->size; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
}
gsl_matrix_free(UtXgamma);
gsl_vector_free(beta);
}
// Calculate centered z_hat, and pve.
if (a_mode == 13) {
time_start = clock();
if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
} else {
CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
}
time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
}
// Start MCMC.
int accept;
size_t total_step = w_step + s_step;
size_t w = 0, w_col, pos;
size_t repeat = 0;
for (size_t t = 0; t < total_step; ++t) {
if (t % d_pace == 0 || t == total_step - 1) {
ProgressBar("Running MCMC ", t, total_step - 1,
(double)n_accept / (double)(t * n_mh + 1));
}
if (a_mode == 13) {
SampleZ(y, z_hat, z);
mean_z = CenterVector(z);
time_start = clock();
gsl_blas_dgemv(CblasTrans, 1.0, U, z, 0.0, Utz);
time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// First proposal.
if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
logPost_old = CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old);
beta_old.clear();
for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
beta_old.push_back(0);
}
} else {
gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_old.n_gamma);
gsl_vector *beta = gsl_vector_alloc(cHyp_old.n_gamma);
SetXgamma(UtXgamma, UtX, rank_old);
logPost_old = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old,
alpha_old, beta, cHyp_old);
beta_old.clear();
for (size_t i = 0; i < beta->size; ++i) {
beta_old.push_back(gsl_vector_get(beta, i));
}
gsl_matrix_free(UtXgamma);
gsl_vector_free(beta);
}
}
// M-H steps.
for (size_t i = 0; i < n_mh; ++i) {
if (gsl_rng_uniform(gsl_r) < 0.33) {
repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
} else {
repeat = 1;
}
logMHratio = 0.0;
logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
logMHratio +=
ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
if (cHyp_new.n_gamma == 0 || cHyp_new.rho == 0) {
logPost_new = CalcPosterior(Utz, K_eval, Utu_new, alpha_new, cHyp_new);
beta_new.clear();
for (size_t i = 0; i < cHyp_new.n_gamma; ++i) {
beta_new.push_back(0);
}
} else {
gsl_matrix *UtXgamma = gsl_matrix_alloc(ni_test, cHyp_new.n_gamma);
gsl_vector *beta = gsl_vector_alloc(cHyp_new.n_gamma);
SetXgamma(UtXgamma, UtX, rank_new);
logPost_new = CalcPosterior(UtXgamma, Utz, K_eval, UtXb_new, Utu_new,
alpha_new, beta, cHyp_new);
beta_new.clear();
for (size_t i = 0; i < beta->size; ++i) {
beta_new.push_back(gsl_vector_get(beta, i));
}
gsl_matrix_free(UtXgamma);
gsl_vector_free(beta);
}
logMHratio += logPost_new - logPost_old;
if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
accept = 1;
n_accept++;
} else {
accept = 0;
}
if (accept == 1) {
logPost_old = logPost_new;
rank_old.clear();
beta_old.clear();
if (rank_new.size() != 0) {
for (size_t i = 0; i < rank_new.size(); ++i) {
rank_old.push_back(rank_new[i]);
beta_old.push_back(beta_new[i]);
}
}
cHyp_old = cHyp_new;
gsl_vector_memcpy(alpha_old, alpha_new);
gsl_vector_memcpy(UtXb_old, UtXb_new);
gsl_vector_memcpy(Utu_old, Utu_new);
} else {
cHyp_new = cHyp_old;
}
}
// Calculate z_hat, and pve.
if (a_mode == 13) {
time_start = clock();
if (cHyp_old.n_gamma == 0 || cHyp_old.rho == 0) {
CalcCC_PVEnZ(U, Utu_old, z_hat, cHyp_old);
} else {
CalcCC_PVEnZ(U, UtXb_old, Utu_old, z_hat, cHyp_old);
}
// Sample mu and update z_hat.
gsl_vector_sub(z, z_hat);
mean_z += CenterVector(z);
mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
gsl_vector_add_constant(z_hat, mean_z);
time_UtZ += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
}
// Save data.
if (t < w_step) {
continue;
} else {
if (t % r_pace == 0) {
w_col = w % w_pace;
if (w_col == 0) {
if (w == 0) {
WriteResult(0, Result_hyp, Result_gamma, w_col);
} else {
WriteResult(1, Result_hyp, Result_gamma, w_col);
gsl_matrix_set_zero(Result_hyp);
gsl_matrix_set_zero(Result_gamma);
}
}
gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
pos = mapRank2pos[rank_old[i]] + 1;
gsl_matrix_set(Result_gamma, w_col, i, pos);
beta_g[pos - 1].first += beta_old[i];
beta_g[pos - 1].second += 1.0;
}
gsl_vector_add(alpha_prime, alpha_old);
gsl_vector_add(Utu, Utu_old);
if (a_mode == 13) {
pheno_mean += mean_z;
}
w++;
}
}
}
cout << endl;
w_col = w % w_pace;
WriteResult(1, Result_hyp, Result_gamma, w_col);
gsl_matrix_free(Result_hyp);
gsl_matrix_free(Result_gamma);
gsl_vector_free(z_hat);
gsl_vector_free(z);
gsl_vector_free(Utz);
gsl_vector_free(UtXb_new);
gsl_vector_free(UtXb_old);
gsl_vector_free(alpha_new);
gsl_vector_free(alpha_old);
gsl_vector_free(Utu_new);
gsl_vector_free(Utu_old);
gsl_vector_scale(alpha_prime, 1.0 / (double)w);
gsl_vector_scale(Utu, 1.0 / (double)w);
if (a_mode == 13) {
pheno_mean /= (double)w;
}
gsl_vector *alpha = gsl_vector_alloc(ns_test);
gsl_blas_dgemv(CblasTrans, 1.0 / (double)ns_test, UtX, alpha_prime, 0.0,
alpha);
WriteParam(beta_g, alpha, w);
gsl_vector_free(alpha);
gsl_blas_dgemv(CblasNoTrans, 1.0, U, Utu, 0.0, alpha_prime);
WriteBV(alpha_prime);
gsl_vector_free(alpha_prime);
gsl_vector_free(Utu);
delete[] p_gamma;
beta_g.clear();
return;
}
void BSLMM::RidgeR(const gsl_matrix *U, const gsl_matrix *UtX,
const gsl_vector *Uty, const gsl_vector *eval,
const double lambda) {
gsl_vector *beta = gsl_vector_alloc(UtX->size2);
gsl_vector *H_eval = gsl_vector_alloc(Uty->size);
gsl_vector *bv = gsl_vector_alloc(Uty->size);
gsl_vector_memcpy(H_eval, eval);
gsl_vector_scale(H_eval, lambda);
gsl_vector_add_constant(H_eval, 1.0);
gsl_vector_memcpy(bv, Uty);
gsl_vector_div(bv, H_eval);
gsl_blas_dgemv(CblasTrans, lambda / (double)UtX->size2, UtX, bv, 0.0, beta);
gsl_vector_add_constant(H_eval, -1.0);
gsl_vector_mul(H_eval, bv);
gsl_blas_dgemv(CblasNoTrans, 1.0, U, H_eval, 0.0, bv);
WriteParam(beta);
WriteBV(bv);
gsl_vector_free(H_eval);
gsl_vector_free(beta);
gsl_vector_free(bv);
return;
}
// Below fits MCMC for rho=1.
void BSLMM::CalcXtX(const gsl_matrix *X, const gsl_vector *y,
const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) {
time_t time_start = clock();
gsl_matrix_const_view X_sub =
gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size);
gsl_matrix_view XtX_sub = gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size);
gsl_vector_view Xty_sub = gsl_vector_subvector(Xty, 0, s_size);
lapack_dgemm((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0,
&XtX_sub.matrix);
gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector);
time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
return;
}
void BSLMM::SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old,
const gsl_matrix *XtX_old, const gsl_vector *Xty_old,
const gsl_vector *y, const vector &rank_old,
const vector &rank_new, gsl_matrix *X_new,
gsl_matrix *XtX_new, gsl_vector *Xty_new) {
double d;
// rank_old and rank_new are sorted already inside PorposeGamma
// calculate vectors rank_remove and rank_add.
// make sure that v_size is larger than repeat.
size_t v_size = 20;
vector rank_remove(v_size), rank_add(v_size),
rank_union(s_max + v_size);
vector::iterator it;
it = set_difference(rank_old.begin(), rank_old.end(), rank_new.begin(),
rank_new.end(), rank_remove.begin());
rank_remove.resize(it - rank_remove.begin());
it = set_difference(rank_new.begin(), rank_new.end(), rank_old.begin(),
rank_old.end(), rank_add.begin());
rank_add.resize(it - rank_add.begin());
it = set_union(rank_new.begin(), rank_new.end(), rank_old.begin(),
rank_old.end(), rank_union.begin());
rank_union.resize(it - rank_union.begin());
// Map rank_remove and rank_add.
map mapRank2in_remove, mapRank2in_add;
for (size_t i = 0; i < rank_remove.size(); i++) {
mapRank2in_remove[rank_remove[i]] = 1;
}
for (size_t i = 0; i < rank_add.size(); i++) {
mapRank2in_add[rank_add[i]] = 1;
}
// Obtain the subset of matrix/vector.
gsl_matrix_const_view Xold_sub =
gsl_matrix_const_submatrix(X_old, 0, 0, X_old->size1, rank_old.size());
gsl_matrix_const_view XtXold_sub = gsl_matrix_const_submatrix(
XtX_old, 0, 0, rank_old.size(), rank_old.size());
gsl_vector_const_view Xtyold_sub =
gsl_vector_const_subvector(Xty_old, 0, rank_old.size());
gsl_matrix_view Xnew_sub =
gsl_matrix_submatrix(X_new, 0, 0, X_new->size1, rank_new.size());
gsl_matrix_view XtXnew_sub =
gsl_matrix_submatrix(XtX_new, 0, 0, rank_new.size(), rank_new.size());
gsl_vector_view Xtynew_sub =
gsl_vector_subvector(Xty_new, 0, rank_new.size());
// Get X_new and calculate XtX_new.
if (rank_remove.size() == 0 && rank_add.size() == 0) {
gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix);
gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix);
gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector);
} else {
size_t i_old, j_old, i_new, j_new, i_add, j_add, i_flag, j_flag;
if (rank_add.size() == 0) {
i_old = 0;
i_new = 0;
for (size_t i = 0; i < rank_union.size(); i++) {
if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
i_old++;
continue;
}
gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
gsl_vector_const_view Xcopy_col = gsl_matrix_const_column(X_old, i_old);
gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
d = gsl_vector_get(Xty_old, i_old);
gsl_vector_set(Xty_new, i_new, d);
j_old = i_old;
j_new = i_new;
for (size_t j = i; j < rank_union.size(); j++) {
if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
j_old++;
continue;
}
d = gsl_matrix_get(XtX_old, i_old, j_old);
gsl_matrix_set(XtX_new, i_new, j_new, d);
if (i_new != j_new) {
gsl_matrix_set(XtX_new, j_new, i_new, d);
}
j_old++;
j_new++;
}
i_old++;
i_new++;
}
} else {
gsl_matrix *X_add = gsl_matrix_alloc(X_old->size1, rank_add.size());
gsl_matrix *XtX_aa = gsl_matrix_alloc(X_add->size2, X_add->size2);
gsl_matrix *XtX_ao = gsl_matrix_alloc(X_add->size2, X_old->size2);
gsl_vector *Xty_add = gsl_vector_alloc(X_add->size2);
// Get X_add.
SetXgamma(X_add, X, rank_add);
// Get t(X_add)X_add and t(X_add)X_temp.
clock_t time_start = clock();
// Somehow the lapack_dgemm does not work here.
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao);
gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add);
time_Omega += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// Save to X_new, XtX_new and Xty_new.
i_old = 0;
i_new = 0;
i_add = 0;
for (size_t i = 0; i < rank_union.size(); i++) {
if (mapRank2in_remove.count(rank_old[i_old]) != 0) {
i_old++;
continue;
}
if (mapRank2in_add.count(rank_new[i_new]) != 0) {
i_flag = 1;
} else {
i_flag = 0;
}
gsl_vector_view Xnew_col = gsl_matrix_column(X_new, i_new);
if (i_flag == 1) {
gsl_vector_view Xcopy_col = gsl_matrix_column(X_add, i_add);
gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
} else {
gsl_vector_const_view Xcopy_col =
gsl_matrix_const_column(X_old, i_old);
gsl_vector_memcpy(&Xnew_col.vector, &Xcopy_col.vector);
}
if (i_flag == 1) {
d = gsl_vector_get(Xty_add, i_add);
} else {
d = gsl_vector_get(Xty_old, i_old);
}
gsl_vector_set(Xty_new, i_new, d);
j_old = i_old;
j_new = i_new;
j_add = i_add;
for (size_t j = i; j < rank_union.size(); j++) {
if (mapRank2in_remove.count(rank_old[j_old]) != 0) {
j_old++;
continue;
}
if (mapRank2in_add.count(rank_new[j_new]) != 0) {
j_flag = 1;
} else {
j_flag = 0;
}
if (i_flag == 1 && j_flag == 1) {
d = gsl_matrix_get(XtX_aa, i_add, j_add);
} else if (i_flag == 1) {
d = gsl_matrix_get(XtX_ao, i_add, j_old);
} else if (j_flag == 1) {
d = gsl_matrix_get(XtX_ao, j_add, i_old);
} else {
d = gsl_matrix_get(XtX_old, i_old, j_old);
}
gsl_matrix_set(XtX_new, i_new, j_new, d);
if (i_new != j_new) {
gsl_matrix_set(XtX_new, j_new, i_new, d);
}
j_new++;
if (j_flag == 1) {
j_add++;
} else {
j_old++;
}
}
i_new++;
if (i_flag == 1) {
i_add++;
} else {
i_old++;
}
}
gsl_matrix_free(X_add);
gsl_matrix_free(XtX_aa);
gsl_matrix_free(XtX_ao);
gsl_vector_free(Xty_add);
}
}
rank_remove.clear();
rank_add.clear();
rank_union.clear();
mapRank2in_remove.clear();
mapRank2in_add.clear();
return;
}
double BSLMM::CalcPosterior(const double yty, class HYPBSLMM &cHyp) {
double logpost = 0.0;
// For quantitative traits, calculate pve and pge.
// Pve and pge for case/control data are calculted in CalcCC_PVEnZ.
if (a_mode == 11) {
cHyp.pve = 0.0;
cHyp.pge = 1.0;
}
// Calculate likelihood.
if (a_mode == 11) {
logpost -= 0.5 * (double)ni_test * log(yty);
} else {
logpost -= 0.5 * yty;
}
logpost += ((double)cHyp.n_gamma - 1.0) * cHyp.logp +
((double)ns_test - (double)cHyp.n_gamma) * log(1 - exp(cHyp.logp));
return logpost;
}
double BSLMM::CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX,
const gsl_vector *Xty, const double yty,
const size_t s_size, gsl_vector *Xb,
gsl_vector *beta, class HYPBSLMM &cHyp) {
double sigma_a2 = cHyp.h / ((1 - cHyp.h) * exp(cHyp.logp) * (double)ns_test);
double logpost = 0.0;
double d, P_yy = yty, logdet_O = 0.0;
gsl_matrix_const_view Xgamma_sub =
gsl_matrix_const_submatrix(Xgamma, 0, 0, Xgamma->size1, s_size);
gsl_matrix_const_view XtX_sub =
gsl_matrix_const_submatrix(XtX, 0, 0, s_size, s_size);
gsl_vector_const_view Xty_sub = gsl_vector_const_subvector(Xty, 0, s_size);
gsl_matrix *Omega = gsl_matrix_alloc(s_size, s_size);
gsl_matrix *M_temp = gsl_matrix_alloc(s_size, s_size);
gsl_vector *beta_hat = gsl_vector_alloc(s_size);
gsl_vector *Xty_temp = gsl_vector_alloc(s_size);
gsl_vector_memcpy(Xty_temp, &Xty_sub.vector);
// Calculate Omega.
gsl_matrix_memcpy(Omega, &XtX_sub.matrix);
gsl_matrix_scale(Omega, sigma_a2);
gsl_matrix_set_identity(M_temp);
gsl_matrix_add(Omega, M_temp);
// Calculate beta_hat.
logdet_O = CholeskySolve(Omega, Xty_temp, beta_hat);
gsl_vector_scale(beta_hat, sigma_a2);
gsl_blas_ddot(Xty_temp, beta_hat, &d);
P_yy -= d;
// Sample tau.
double tau = 1.0;
if (a_mode == 11) {
tau = gsl_ran_gamma(gsl_r, (double)ni_test / 2.0, 2.0 / P_yy);
}
// Sample beta.
for (size_t i = 0; i < s_size; i++) {
d = gsl_ran_gaussian(gsl_r, 1);
gsl_vector_set(beta, i, d);
}
gsl_vector_view beta_sub = gsl_vector_subvector(beta, 0, s_size);
gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega,
&beta_sub.vector);
// This computes inv(L^T(Omega)) %*% beta.
gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2 / tau));
gsl_vector_add(&beta_sub.vector, beta_hat);
gsl_blas_dgemv(CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0,
Xb);
// For quantitative traits, calculate pve and pge.
if (a_mode == 11) {
gsl_blas_ddot(Xb, Xb, &d);
cHyp.pve = d / (double)ni_test;
cHyp.pve /= cHyp.pve + 1.0 / tau;
cHyp.pge = 1.0;
}
logpost = -0.5 * logdet_O;
if (a_mode == 11) {
logpost -= 0.5 * (double)ni_test * log(P_yy);
} else {
logpost -= 0.5 * P_yy;
}
logpost +=
((double)cHyp.n_gamma - 1.0) * cHyp.logp +
((double)ns_test - (double)cHyp.n_gamma) * log(1.0 - exp(cHyp.logp));
gsl_matrix_free(Omega);
gsl_matrix_free(M_temp);
gsl_vector_free(beta_hat);
gsl_vector_free(Xty_temp);
return logpost;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
void BSLMM::CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp) {
gsl_vector_set_zero(z_hat);
cHyp.pve = 0.0;
cHyp.pge = 1.0;
return;
}
// Calculate pve and pge, and calculate z_hat for case-control data.
void BSLMM::CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat,
class HYPBSLMM &cHyp) {
double d;
gsl_blas_ddot(Xb, Xb, &d);
cHyp.pve = d / (double)ni_test;
cHyp.pve /= cHyp.pve + 1.0;
cHyp.pge = 1.0;
gsl_vector_memcpy(z_hat, Xb);
return;
}
// If a_mode==13, then run probit model.
void BSLMM::MCMC(const gsl_matrix *X, const gsl_vector *y) {
clock_t time_start;
double time_set = 0, time_post = 0;
class HYPBSLMM cHyp_old, cHyp_new;
gsl_matrix *Result_hyp = gsl_matrix_alloc(w_pace, 6);
gsl_matrix *Result_gamma = gsl_matrix_alloc(w_pace, s_max);
gsl_vector *Xb_new = gsl_vector_alloc(ni_test);
gsl_vector *Xb_old = gsl_vector_alloc(ni_test);
gsl_vector *z_hat = gsl_vector_alloc(ni_test);
gsl_vector *z = gsl_vector_alloc(ni_test);
gsl_matrix *Xgamma_old = gsl_matrix_alloc(ni_test, s_max);
gsl_matrix *XtX_old = gsl_matrix_alloc(s_max, s_max);
gsl_vector *Xtz_old = gsl_vector_alloc(s_max);
gsl_vector *beta_old = gsl_vector_alloc(s_max);
gsl_matrix *Xgamma_new = gsl_matrix_alloc(ni_test, s_max);
gsl_matrix *XtX_new = gsl_matrix_alloc(s_max, s_max);
gsl_vector *Xtz_new = gsl_vector_alloc(s_max);
gsl_vector *beta_new = gsl_vector_alloc(s_max);
double ztz = 0.0;
gsl_vector_memcpy(z, y);
// For quantitative traits, y is centered already in
// gemma.cpp, but just in case.
double mean_z = CenterVector(z);
gsl_blas_ddot(z, z, &ztz);
double logPost_new, logPost_old;
double logMHratio;
gsl_matrix_set_zero(Result_gamma);
if (a_mode == 13) {
pheno_mean = 0.0;
}
vector> beta_g;
for (size_t i = 0; i < ns_test; i++) {
beta_g.push_back(make_pair(0.0, 0.0));
}
vector rank_new, rank_old;
vector> pos_loglr;
time_start = clock();
MatrixCalcLmLR(X, z, pos_loglr);
time_Proposal = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
stable_sort(pos_loglr.begin(), pos_loglr.end(), comp_lr);
for (size_t i = 0; i < ns_test; ++i) {
mapRank2pos[i] = pos_loglr[i].first;
}
// Calculate proposal distribution for gamma (unnormalized),
// and set up gsl_r and gsl_t.
gsl_rng_env_setup();
const gsl_rng_type *gslType;
gslType = gsl_rng_default;
if (randseed < 0) {
time_t rawtime;
time(&rawtime);
tm *ptm = gmtime(&rawtime);
randseed =
(unsigned)(ptm->tm_hour % 24 * 3600 + ptm->tm_min * 60 + ptm->tm_sec);
}
gsl_r = gsl_rng_alloc(gslType);
gsl_rng_set(gsl_r, randseed);
double *p_gamma = new double[ns_test];
CalcPgamma(p_gamma);
gsl_t = gsl_ran_discrete_preproc(ns_test, p_gamma);
// Initial parameters.
InitialMCMC(X, z, rank_old, cHyp_old, pos_loglr);
cHyp_initial = cHyp_old;
if (cHyp_old.n_gamma == 0) {
logPost_old = CalcPosterior(ztz, cHyp_old);
} else {
SetXgamma(Xgamma_old, X, rank_old);
CalcXtX(Xgamma_old, z, rank_old.size(), XtX_old, Xtz_old);
logPost_old = CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz,
rank_old.size(), Xb_old, beta_old, cHyp_old);
}
// Calculate centered z_hat, and pve.
if (a_mode == 13) {
if (cHyp_old.n_gamma == 0) {
CalcCC_PVEnZ(z_hat, cHyp_old);
} else {
CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
}
}
// Start MCMC.
int accept;
size_t total_step = w_step + s_step;
size_t w = 0, w_col, pos;
size_t repeat = 0;
for (size_t t = 0; t < total_step; ++t) {
if (t % d_pace == 0 || t == total_step - 1) {
ProgressBar("Running MCMC ", t, total_step - 1,
(double)n_accept / (double)(t * n_mh + 1));
}
if (a_mode == 13) {
SampleZ(y, z_hat, z);
mean_z = CenterVector(z);
gsl_blas_ddot(z, z, &ztz);
// First proposal.
if (cHyp_old.n_gamma == 0) {
logPost_old = CalcPosterior(ztz, cHyp_old);
} else {
gsl_matrix_view Xold_sub =
gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_old.size());
gsl_vector_view Xtz_sub =
gsl_vector_subvector(Xtz_old, 0, rank_old.size());
gsl_blas_dgemv(CblasTrans, 1.0, &Xold_sub.matrix, z, 0.0,
&Xtz_sub.vector);
logPost_old =
CalcPosterior(Xgamma_old, XtX_old, Xtz_old, ztz, rank_old.size(),
Xb_old, beta_old, cHyp_old);
}
}
// M-H steps.
for (size_t i = 0; i < n_mh; ++i) {
if (gsl_rng_uniform(gsl_r) < 0.33) {
repeat = 1 + gsl_rng_uniform_int(gsl_r, 20);
} else {
repeat = 1;
}
logMHratio = 0.0;
logMHratio += ProposeHnRho(cHyp_old, cHyp_new, repeat);
logMHratio +=
ProposeGamma(rank_old, rank_new, p_gamma, cHyp_old, cHyp_new, repeat);
logMHratio += ProposePi(cHyp_old, cHyp_new, repeat);
if (cHyp_new.n_gamma == 0) {
logPost_new = CalcPosterior(ztz, cHyp_new);
} else {
// This makes sure that rank_old.size() ==
// rank_remove.size() does not happen.
if (cHyp_new.n_gamma <= 20 || cHyp_old.n_gamma <= 20) {
time_start = clock();
SetXgamma(Xgamma_new, X, rank_new);
CalcXtX(Xgamma_new, z, rank_new.size(), XtX_new, Xtz_new);
time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
} else {
time_start = clock();
SetXgamma(X, Xgamma_old, XtX_old, Xtz_old, z, rank_old, rank_new,
Xgamma_new, XtX_new, Xtz_new);
time_set += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
}
time_start = clock();
logPost_new =
CalcPosterior(Xgamma_new, XtX_new, Xtz_new, ztz, rank_new.size(),
Xb_new, beta_new, cHyp_new);
time_post += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
}
logMHratio += logPost_new - logPost_old;
if (logMHratio > 0 || log(gsl_rng_uniform(gsl_r)) < logMHratio) {
accept = 1;
n_accept++;
} else {
accept = 0;
}
if (accept == 1) {
logPost_old = logPost_new;
cHyp_old = cHyp_new;
gsl_vector_memcpy(Xb_old, Xb_new);
rank_old.clear();
if (rank_new.size() != 0) {
for (size_t i = 0; i < rank_new.size(); ++i) {
rank_old.push_back(rank_new[i]);
}
gsl_matrix_view Xold_sub =
gsl_matrix_submatrix(Xgamma_old, 0, 0, ni_test, rank_new.size());
gsl_matrix_view XtXold_sub = gsl_matrix_submatrix(
XtX_old, 0, 0, rank_new.size(), rank_new.size());
gsl_vector_view Xtzold_sub =
gsl_vector_subvector(Xtz_old, 0, rank_new.size());
gsl_vector_view betaold_sub =
gsl_vector_subvector(beta_old, 0, rank_new.size());
gsl_matrix_view Xnew_sub =
gsl_matrix_submatrix(Xgamma_new, 0, 0, ni_test, rank_new.size());
gsl_matrix_view XtXnew_sub = gsl_matrix_submatrix(
XtX_new, 0, 0, rank_new.size(), rank_new.size());
gsl_vector_view Xtznew_sub =
gsl_vector_subvector(Xtz_new, 0, rank_new.size());
gsl_vector_view betanew_sub =
gsl_vector_subvector(beta_new, 0, rank_new.size());
gsl_matrix_memcpy(&Xold_sub.matrix, &Xnew_sub.matrix);
gsl_matrix_memcpy(&XtXold_sub.matrix, &XtXnew_sub.matrix);
gsl_vector_memcpy(&Xtzold_sub.vector, &Xtznew_sub.vector);
gsl_vector_memcpy(&betaold_sub.vector, &betanew_sub.vector);
}
} else {
cHyp_new = cHyp_old;
}
}
// Calculate z_hat, and pve.
if (a_mode == 13) {
if (cHyp_old.n_gamma == 0) {
CalcCC_PVEnZ(z_hat, cHyp_old);
} else {
CalcCC_PVEnZ(Xb_old, z_hat, cHyp_old);
}
// Sample mu and update z_hat.
gsl_vector_sub(z, z_hat);
mean_z += CenterVector(z);
mean_z += gsl_ran_gaussian(gsl_r, sqrt(1.0 / (double)ni_test));
gsl_vector_add_constant(z_hat, mean_z);
}
// Save data.
if (t < w_step) {
continue;
} else {
if (t % r_pace == 0) {
w_col = w % w_pace;
if (w_col == 0) {
if (w == 0) {
WriteResult(0, Result_hyp, Result_gamma, w_col);
} else {
WriteResult(1, Result_hyp, Result_gamma, w_col);
gsl_matrix_set_zero(Result_hyp);
gsl_matrix_set_zero(Result_gamma);
}
}
gsl_matrix_set(Result_hyp, w_col, 0, cHyp_old.h);
gsl_matrix_set(Result_hyp, w_col, 1, cHyp_old.pve);
gsl_matrix_set(Result_hyp, w_col, 2, cHyp_old.rho);
gsl_matrix_set(Result_hyp, w_col, 3, cHyp_old.pge);
gsl_matrix_set(Result_hyp, w_col, 4, cHyp_old.logp);
gsl_matrix_set(Result_hyp, w_col, 5, cHyp_old.n_gamma);
for (size_t i = 0; i < cHyp_old.n_gamma; ++i) {
pos = mapRank2pos[rank_old[i]] + 1;
gsl_matrix_set(Result_gamma, w_col, i, pos);
beta_g[pos - 1].first += gsl_vector_get(beta_old, i);
beta_g[pos - 1].second += 1.0;
}
if (a_mode == 13) {
pheno_mean += mean_z;
}
w++;
}
}
}
cout << endl;
cout << "time on selecting Xgamma: " << time_set << endl;
cout << "time on calculating posterior: " << time_post << endl;
w_col = w % w_pace;
WriteResult(1, Result_hyp, Result_gamma, w_col);
gsl_vector *alpha = gsl_vector_alloc(ns_test);
gsl_vector_set_zero(alpha);
WriteParam(beta_g, alpha, w);
gsl_vector_free(alpha);
gsl_matrix_free(Result_hyp);
gsl_matrix_free(Result_gamma);
gsl_vector_free(z_hat);
gsl_vector_free(z);
gsl_vector_free(Xb_new);
gsl_vector_free(Xb_old);
gsl_matrix_free(Xgamma_old);
gsl_matrix_free(XtX_old);
gsl_vector_free(Xtz_old);
gsl_vector_free(beta_old);
gsl_matrix_free(Xgamma_new);
gsl_matrix_free(XtX_new);
gsl_vector_free(Xtz_new);
gsl_vector_free(beta_new);
delete[] p_gamma;
beta_g.clear();
return;
}