diff options
Diffstat (limited to 'src/bslmm.h')
-rw-r--r-- | src/bslmm.h | 282 |
1 files changed, 136 insertions, 146 deletions
diff --git a/src/bslmm.h b/src/bslmm.h index c7768a2..d2dadbf 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -19,10 +19,10 @@ #ifndef __BSLMM_H__ #define __BSLMM_H__ -#include <vector> -#include <map> -#include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> +#include <gsl/gsl_rng.h> +#include <map> +#include <vector> #include "param.h" @@ -31,149 +31,139 @@ using namespace std; class BSLMM { public: - // IO-related parameters. - int a_mode; - size_t d_pace; - - string file_bfile; - string file_geno; - string file_out; - string path_out; - - // LMM-related parameters. - double l_min; - double l_max; - size_t n_region; - double pve_null; - double pheno_mean; - - // BSLMM MCMC-related parameters - double h_min, h_max, h_scale; // Priors for h. - double rho_min, rho_max, rho_scale; // Priors for rho. - double logp_min, logp_max, logp_scale; // Priors for log(pi). - size_t s_min, s_max; // Min. & max. number of gammas. - size_t w_step; // Number of warm up/burn in - // iterations. - size_t s_step; // Num. sampling iterations. - size_t r_pace; // Record pace. - size_t w_pace; // Write pace. - size_t n_accept; // Number of acceptances. - size_t n_mh; // Number of MH steps per iter. - double geo_mean; // Mean of geometric dist. - long int randseed; - double trace_G; - - HYPBSLMM cHyp_initial; - - // Summary statistics. - size_t ni_total, ns_total; // Number of total individuals and SNPs - size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis. - size_t n_cvt; // Number of covariates. - double time_UtZ; - double time_Omega; // Time spent on optimization iterations. - - // Time spent on constructing the proposal distribution for - // gamma (i.e. lmm or lm analysis). - double time_Proposal; - - // Indicator for individuals (phenotypes): 0 missing, 1 - // available for analysis. - vector<int> indicator_idv; - - // Sequence indicator for SNPs: 0 ignored because of (a) maf, - // (b) miss, (c) non-poly; 1 available for analysis. - vector<int> indicator_snp; - - // Record SNP information. - vector<SNPINFO> snpInfo; - - // Not included in PARAM. - gsl_rng *gsl_r; - gsl_ran_discrete_t *gsl_t; - map<size_t, size_t> mapRank2pos; - - // Main functions. - void CopyFromParam (PARAM &cPar); - void CopyToParam (PARAM &cPar); - - void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *eval, - const double lambda); - - void MCMC (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - const gsl_vector *y); - void WriteLog (); - void WriteLR (); - void WriteBV (const gsl_vector *bv); - void WriteParam (vector<pair<double, double> > &beta_g, - const gsl_vector *alpha, const size_t w); - void WriteParam (const gsl_vector *alpha); - void WriteResult (const int flag, const gsl_matrix *Result_hyp, - const gsl_matrix *Result_gamma, const size_t w_col); - - // Subfunctions inside MCMC. - void CalcPgamma (double *p_gammar); - - double CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, - const double sigma_a2); - void InitialMCMC (const gsl_matrix *UtX, const gsl_vector *Uty, - vector<size_t> &rank_old, class HYPBSLMM &cHyp, - vector<pair<size_t, double> > &pos_loglr); - double CalcPosterior (const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *Utu, gsl_vector *alpha_prime, - class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *UtXgamma, - const gsl_vector *Uty, const gsl_vector *K_eval, - gsl_vector *UtXb, gsl_vector *Utu, - gsl_vector *alpha_prime, gsl_vector *beta, - class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *Utu, - gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_matrix *U, const gsl_vector *UtXb, - const gsl_vector *Utu, gsl_vector *z_hat, - class HYPBSLMM &cHyp); - double CalcREMLE (const gsl_matrix *Utw, const gsl_vector *Uty, - const gsl_vector *K_eval); - - // Calculate the maximum marginal likelihood ratio for each - // analyzed SNPs with gemma, use it to rank SNPs. - double CalcLR (const gsl_matrix *U, const gsl_matrix *UtX, - const gsl_vector *Uty, const gsl_vector *K_eval, - vector<pair<size_t, double> > &loglr_sort); - void SampleZ (const gsl_vector *y, const gsl_vector *z_hat, - gsl_vector *z); - double ProposeHnRho (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat); - double ProposePi (const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, - const size_t &repeat); - double ProposeGamma (const vector<size_t> &rank_old, - vector<size_t> &rank_new, const double *p_gamma, - const class HYPBSLMM &cHyp_old, - class HYPBSLMM &cHyp_new, const size_t &repeat); - void SetXgamma (gsl_matrix *Xgamma, const gsl_matrix *X, - vector<size_t> &rank); - - void CalcXtX (const gsl_matrix *X_new, const gsl_vector *y, - const size_t s_size, gsl_matrix *XtX_new, - gsl_vector *Xty_new); - void SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, - const gsl_matrix *XtX_old, const gsl_vector *Xty_old, - const gsl_vector *y, const vector<size_t> &rank_old, - const vector<size_t> &rank_new, gsl_matrix *X_new, - gsl_matrix *XtX_new, gsl_vector *Xty_new); - double CalcPosterior (const double yty, class HYPBSLMM &cHyp); - double CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, - const gsl_vector *Xty, const double yty, - const size_t s_size, gsl_vector *Xb, - gsl_vector *beta, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (gsl_vector *z_hat, class HYPBSLMM &cHyp); - void CalcCC_PVEnZ (const gsl_vector *Xb, gsl_vector *z_hat, - class HYPBSLMM &cHyp); - void MCMC (const gsl_matrix *X, const gsl_vector *y); + // IO-related parameters. + int a_mode; + size_t d_pace; + + string file_bfile; + string file_geno; + string file_out; + string path_out; + + // LMM-related parameters. + double l_min; + double l_max; + size_t n_region; + double pve_null; + double pheno_mean; + + // BSLMM MCMC-related parameters + double h_min, h_max, h_scale; // Priors for h. + double rho_min, rho_max, rho_scale; // Priors for rho. + double logp_min, logp_max, logp_scale; // Priors for log(pi). + size_t s_min, s_max; // Min. & max. number of gammas. + size_t w_step; // Number of warm up/burn in + // iterations. + size_t s_step; // Num. sampling iterations. + size_t r_pace; // Record pace. + size_t w_pace; // Write pace. + size_t n_accept; // Number of acceptances. + size_t n_mh; // Number of MH steps per iter. + double geo_mean; // Mean of geometric dist. + long int randseed; + double trace_G; + + HYPBSLMM cHyp_initial; + + // Summary statistics. + size_t ni_total, ns_total; // Number of total individuals and SNPs + size_t ni_test, ns_test; // Num. individuals & SNPs used in analysis. + size_t n_cvt; // Number of covariates. + double time_UtZ; + double time_Omega; // Time spent on optimization iterations. + + // Time spent on constructing the proposal distribution for + // gamma (i.e. lmm or lm analysis). + double time_Proposal; + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis. + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + // Record SNP information. + vector<SNPINFO> snpInfo; + + // Not included in PARAM. + gsl_rng *gsl_r; + gsl_ran_discrete_t *gsl_t; + map<size_t, size_t> mapRank2pos; + + // Main functions. + void CopyFromParam(PARAM &cPar); + void CopyToParam(PARAM &cPar); + + void RidgeR(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, + const gsl_vector *eval, const double lambda); + + void MCMC(const gsl_matrix *U, const gsl_matrix *UtX, const gsl_vector *Uty, + const gsl_vector *K_eval, const gsl_vector *y); + void WriteLog(); + void WriteLR(); + void WriteBV(const gsl_vector *bv); + void WriteParam(vector<pair<double, double>> &beta_g, const gsl_vector *alpha, + const size_t w); + void WriteParam(const gsl_vector *alpha); + void WriteResult(const int flag, const gsl_matrix *Result_hyp, + const gsl_matrix *Result_gamma, const size_t w_col); + + // Subfunctions inside MCMC. + void CalcPgamma(double *p_gammar); + + double CalcPveLM(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const double sigma_a2); + void InitialMCMC(const gsl_matrix *UtX, const gsl_vector *Uty, + vector<size_t> &rank_old, class HYPBSLMM &cHyp, + vector<pair<size_t, double>> &pos_loglr); + double CalcPosterior(const gsl_vector *Uty, const gsl_vector *K_eval, + gsl_vector *Utu, gsl_vector *alpha_prime, + class HYPBSLMM &cHyp); + double CalcPosterior(const gsl_matrix *UtXgamma, const gsl_vector *Uty, + const gsl_vector *K_eval, gsl_vector *UtXb, + gsl_vector *Utu, gsl_vector *alpha_prime, + gsl_vector *beta, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *Utu, + gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_matrix *U, const gsl_vector *UtXb, + const gsl_vector *Utu, gsl_vector *z_hat, + class HYPBSLMM &cHyp); + double CalcREMLE(const gsl_matrix *Utw, const gsl_vector *Uty, + const gsl_vector *K_eval); + + // Calculate the maximum marginal likelihood ratio for each + // analyzed SNPs with gemma, use it to rank SNPs. + double CalcLR(const gsl_matrix *U, const gsl_matrix *UtX, + const gsl_vector *Uty, const gsl_vector *K_eval, + vector<pair<size_t, double>> &loglr_sort); + void SampleZ(const gsl_vector *y, const gsl_vector *z_hat, gsl_vector *z); + double ProposeHnRho(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, + const size_t &repeat); + double ProposePi(const class HYPBSLMM &cHyp_old, class HYPBSLMM &cHyp_new, + const size_t &repeat); + double ProposeGamma(const vector<size_t> &rank_old, vector<size_t> &rank_new, + const double *p_gamma, const class HYPBSLMM &cHyp_old, + class HYPBSLMM &cHyp_new, const size_t &repeat); + void SetXgamma(gsl_matrix *Xgamma, const gsl_matrix *X, vector<size_t> &rank); + + void CalcXtX(const gsl_matrix *X_new, const gsl_vector *y, + const size_t s_size, gsl_matrix *XtX_new, gsl_vector *Xty_new); + void SetXgamma(const gsl_matrix *X, const gsl_matrix *X_old, + const gsl_matrix *XtX_old, const gsl_vector *Xty_old, + const gsl_vector *y, const vector<size_t> &rank_old, + const vector<size_t> &rank_new, gsl_matrix *X_new, + gsl_matrix *XtX_new, gsl_vector *Xty_new); + double CalcPosterior(const double yty, class HYPBSLMM &cHyp); + double CalcPosterior(const gsl_matrix *Xgamma, const gsl_matrix *XtX, + const gsl_vector *Xty, const double yty, + const size_t s_size, gsl_vector *Xb, gsl_vector *beta, + class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(gsl_vector *z_hat, class HYPBSLMM &cHyp); + void CalcCC_PVEnZ(const gsl_vector *Xb, gsl_vector *z_hat, + class HYPBSLMM &cHyp); + void MCMC(const gsl_matrix *X, const gsl_vector *y); }; #endif - - |