diff options
author | Peter Carbonetto | 2017-05-31 15:12:19 -0500 |
---|---|---|
committer | Peter Carbonetto | 2017-05-31 15:12:19 -0500 |
commit | f28b8531d135a948a5858844c6b6ce2bbfcf6813 (patch) | |
tree | 0843cf7823dc84a3bae5b89fce84951066e788c9 | |
parent | 38de84d5d6e72728c06a57f8eeea0bb975f7192e (diff) | |
download | pangemma-f28b8531d135a948a5858844c6b6ce2bbfcf6813.tar.gz |
Removed WITH_LAPACK compile/build option.
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | src/bslmm.cpp | 173 | ||||
-rw-r--r-- | src/bslmm.h | 16 | ||||
-rw-r--r-- | src/bslmmdap.cpp | 220 | ||||
-rw-r--r-- | src/bslmmdap.h | 18 | ||||
-rw-r--r-- | src/io.cpp | 139 | ||||
-rw-r--r-- | src/io.h | 72 | ||||
-rw-r--r-- | src/lapack.cpp | 94 | ||||
-rw-r--r-- | src/param.cpp | 2 | ||||
-rw-r--r-- | src/param.h | 2 |
10 files changed, 172 insertions, 567 deletions
@@ -4,14 +4,12 @@ # Unix / Linux LNX # Mac MAC # Compilation options -# link to LAPACK WITH_LAPACK # 32-bit binary FORCE_32BIT # dynamic compilation FORCE_DYNAMIC # Set this variable to either LNX or MAC SYS = LNX # Leave blank after "=" to disable; put "= 1" to enable -# Disable WITH_LAPACK option can slow computation speed significantly and is not recommended WITH_LAPACK = 1 FORCE_32BIT = FORCE_DYNAMIC = @@ -53,7 +51,6 @@ HDR += $(SRC_DIR)/param.h $(SRC_DIR)/gemma.h $(SRC_DIR)/io.h $(SRC_DIR)/lm.h $(S ifdef WITH_LAPACK OBJS += $(SRC_DIR)/lapack.o - CPPFLAGS += -DWITH_LAPACK ifeq ($(SYS), MAC) LIBS += $(LIBS_MAC_D_LAPACK) else diff --git a/src/bslmm.cpp b/src/bslmm.cpp index 55a05ca..0bd4234 100644 --- a/src/bslmm.cpp +++ b/src/bslmm.cpp @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -38,30 +38,15 @@ #include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" - - - #include "lapack.h" - -#ifdef FORCE_FLOAT -#include "param_float.h" -#include "bslmm_float.h" -#include "lmm_float.h" //for class FUNC_PARAM and MatrixCalcLR -#include "lm_float.h" -#include "mathfunc_float.h" //for function CenterVector -#else #include "param.h" #include "bslmm.h" #include "lmm.h" #include "lm.h" #include "mathfunc.h" -#endif using namespace std; - - - void BSLMM::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; @@ -330,11 +315,8 @@ double BSLMM::CalcPveLM (const gsl_matrix *UtXgamma, const gsl_vector *Uty, cons gsl_matrix_set_identity (Omega); gsl_matrix_scale (Omega, 1.0/sigma_a2); -#ifdef WITH_LAPACK - lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, 1.0, Omega); -#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, UtXgamma, UtXgamma, 1.0, Omega); -#endif + lapack_dgemm ((char *)"T", (char *)"N", 1.0, UtXgamma, UtXgamma, + 1.0, Omega); gsl_blas_dgemv (CblasTrans, 1.0, UtXgamma, Uty, 0.0, Xty); CholeskySolve(Omega, Xty, OiXty); @@ -517,11 +499,8 @@ double BSLMM::CalcPosterior (const gsl_matrix *UtXgamma, const gsl_vector *Uty, gsl_matrix_set_identity (Omega); time_start=clock(); -#ifdef WITH_LAPACK - lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); -#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); -#endif + lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, + UtXgamma, 1.0, Omega); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -1229,11 +1208,8 @@ void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_si gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size); -#ifdef WITH_LAPACK - lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix); -#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix); -#endif + lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, + &X_sub.matrix, 0.0, &XtX_sub.matrix); gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -1282,127 +1258,6 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m gsl_vector_view Xtynew_sub=gsl_vector_subvector(Xty_new, 0, rank_new.size()); //get X_new and calculate XtX_new - /* - if (rank_remove.size()==0 && rank_add.size()==0) { - gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); - gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); - gsl_vector_memcpy(&Xtynew_sub.vector, &Xtyold_sub.vector); - } else { - gsl_matrix *X_temp=gsl_matrix_alloc(X_old->size1, rank_old.size()-rank_remove.size() ); - gsl_matrix *XtX_temp=gsl_matrix_alloc(X_temp->size2, X_temp->size2); - gsl_vector *Xty_temp=gsl_vector_alloc(X_temp->size2); - - if (rank_remove.size()==0) { - gsl_matrix_memcpy (X_temp, &Xold_sub.matrix); - gsl_matrix_memcpy (XtX_temp, &XtXold_sub.matrix); - gsl_vector_memcpy (Xty_temp, &Xtyold_sub.vector); - } else { - size_t i_temp=0, j_temp; - for (size_t i=0; i<rank_old.size(); i++) { - if (mapRank2in_remove.count(rank_old[i])!=0) {continue;} - gsl_vector_const_view Xold_col=gsl_matrix_const_column(X_old, i); - gsl_vector_view Xtemp_col=gsl_matrix_column(X_temp, i_temp); - gsl_vector_memcpy (&Xtemp_col.vector, &Xold_col.vector); - - d=gsl_vector_get (Xty_old, i); - gsl_vector_set (Xty_temp, i_temp, d); - - j_temp=i_temp; - for (size_t j=i; j<rank_old.size(); j++) { - if (mapRank2in_remove.count(rank_old[j])!=0) {continue;} - d=gsl_matrix_get (XtX_old, i, j); - gsl_matrix_set (XtX_temp, i_temp, j_temp, d); - if (i_temp!=j_temp) {gsl_matrix_set (XtX_temp, j_temp, i_temp, d);} - j_temp++; - } - i_temp++; - } - } - - if (rank_add.size()==0) { - gsl_matrix_memcpy (&Xnew_sub.matrix, X_temp); - gsl_matrix_memcpy (&XtXnew_sub.matrix, XtX_temp); - gsl_vector_memcpy (&Xtynew_sub.vector, Xty_temp); - } else { - gsl_matrix *X_add=gsl_matrix_alloc(X_old->size1, rank_add.size() ); - gsl_matrix *XtX_aa=gsl_matrix_alloc(X_add->size2, X_add->size2); - gsl_matrix *XtX_at=gsl_matrix_alloc(X_add->size2, X_temp->size2); - gsl_vector *Xty_add=gsl_vector_alloc(X_add->size2); - - //get X_add - SetXgamma (X_add, X, rank_add); - - //get t(X_add)X_add and t(X_add)X_temp - clock_t time_start=clock(); - - //somehow the lapack_dgemm does not work here - //#ifdef WITH_LAPACK - //lapack_dgemm ((char *)"T", (char *)"N", 1.0, X_add, X_add, 0.0, XtX_aa); - //lapack_dgemm ((char *)"T", (char *)"N", 1.0, X_add, X_temp, 0.0, XtX_at); - - //#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_temp, 0.0, XtX_at); - //#endif - gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add); - - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //save to X_new, XtX_new and Xty_new - size_t i_temp=0, j_temp, i_flag=0, j_flag=0; - for (size_t i=0; i<rank_new.size(); i++) { - if (mapRank2in_add.count(rank_new[i])!=0) {i_flag=1;} else {i_flag=0;} - gsl_vector_view Xnew_col=gsl_matrix_column(X_new, i); - if (i_flag==1) { - gsl_vector_view Xcopy_col=gsl_matrix_column(X_add, i-i_temp); - gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); - } else { - gsl_vector_view Xcopy_col=gsl_matrix_column(X_temp, i_temp); - gsl_vector_memcpy (&Xnew_col.vector, &Xcopy_col.vector); - } - - if (i_flag==1) { - d=gsl_vector_get (Xty_add, i-i_temp); - } else { - d=gsl_vector_get (Xty_temp, i_temp); - } - gsl_vector_set (Xty_new, i, d); - - j_temp=i_temp; - for (size_t j=i; j<rank_new.size(); j++) { - if (mapRank2in_add.count(rank_new[j])!=0) {j_flag=1;} else {j_flag=0;} - - if (i_flag==1 && j_flag==1) { - d=gsl_matrix_get(XtX_aa, i-i_temp, j-j_temp); - } else if (i_flag==1) { - d=gsl_matrix_get(XtX_at, i-i_temp, j_temp); - } else if (j_flag==1) { - d=gsl_matrix_get(XtX_at, j-j_temp, i_temp); - } else { - d=gsl_matrix_get(XtX_temp, i_temp, j_temp); - } - - gsl_matrix_set (XtX_new, i, j, d); - if (i!=j) {gsl_matrix_set (XtX_new, j, i, d);} - - if (j_flag==0) {j_temp++;} - } - if (i_flag==0) {i_temp++;} - } - - gsl_matrix_free(X_add); - gsl_matrix_free(XtX_aa); - gsl_matrix_free(XtX_at); - gsl_vector_free(Xty_add); - } - - gsl_matrix_free(X_temp); - gsl_matrix_free(XtX_temp); - gsl_vector_free(Xty_temp); - } - */ - - if (rank_remove.size()==0 && rank_add.size()==0) { gsl_matrix_memcpy(&Xnew_sub.matrix, &Xold_sub.matrix); gsl_matrix_memcpy(&XtXnew_sub.matrix, &XtXold_sub.matrix); @@ -1447,14 +1302,10 @@ void BSLMM::SetXgamma (const gsl_matrix *X, const gsl_matrix *X_old, const gsl_m clock_t time_start=clock(); //somehow the lapack_dgemm does not work here - //#ifdef WITH_LAPACK - //lapack_dgemm ((char *)"T", (char *)"N", 1.0, X_add, X_add, 0.0, XtX_aa); - //lapack_dgemm ((char *)"T", (char *)"N", 1.0, X_add, X_temp, 0.0, XtX_at); - - //#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, 0.0, XtX_aa); - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old, 0.0, XtX_ao); - //#endif + gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_add, + 0.0, XtX_aa); + gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, X_add, X_old, + 0.0, XtX_ao); gsl_blas_dgemv(CblasTrans, 1.0, X_add, y, 0.0, Xty_add); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); diff --git a/src/bslmm.h b/src/bslmm.h index 8b5edc7..863a22d 100644 --- a/src/bslmm.h +++ b/src/bslmm.h @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -25,24 +25,14 @@ #include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> -#ifdef FORCE_FLOAT -#include "param_float.h" -#else #include "param.h" -#endif - using namespace std; - - - - - class BSLMM { public: - // IO related parameters + // IO-related parameters. int a_mode; size_t d_pace; diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp index f6e9e9c..c66189b 100644 --- a/src/bslmmdap.cpp +++ b/src/bslmmdap.cpp @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -38,31 +38,17 @@ #include "gsl/gsl_cdf.h" #include "gsl/gsl_roots.h" - - #include "logistic.h" #include "lapack.h" #include "io.h" - -#ifdef FORCE_FLOAT -#include "param_float.h" -#include "bslmmdap_float.h" -#include "lmm_float.h" //for class FUNC_PARAM and MatrixCalcLR -#include "lm_float.h" -#include "mathfunc_float.h" //for function CenterVector -#else #include "param.h" #include "bslmmdap.h" #include "lmm.h" #include "lm.h" #include "mathfunc.h" -#endif using namespace std; - - - void BSLMMDAP::CopyFromParam (PARAM &cPar) { file_out=cPar.file_out; @@ -506,11 +492,8 @@ double BSLMMDAP::CalcMarginal (const gsl_matrix *UtXgamma, const gsl_vector *Uty gsl_matrix_set_identity (Omega); time_start=clock(); -#ifdef WITH_LAPACK - lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); -#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, sigma_a2, UtXgamma_eval, UtXgamma, 1.0, Omega); -#endif + lapack_dgemm ((char *)"T", (char *)"N", sigma_a2, UtXgamma_eval, + UtXgamma, 1.0, Omega); time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); //calculate beta_hat @@ -819,198 +802,3 @@ void BSLMMDAP::DAP_EstimateHyper (const size_t kc, const size_t kd, const vector gsl_vector_free(coef); return; } - -/* -//readin the estimated hyper-parameters and perform fine mapping for each region -void BSLMM::DAP_FineMapping (const gsl_matrix *U, const gsl_matrix *UtX, const gsl_matrix *A, const gsl_vector *Uty, const gsl_vector *K_eval, const gsl_vector *y, gsl_matrix *Hyper, gsl_vector *alpha, gsl_vector *pip) { - clock_t time_start; - - //two priority sets: S_1 contains all candidate causal SNPs; S_2 contains the prioritized combintion of them - //two marginal probability sets: P_1 contains marginals for S_1; P_2 contains marginals for S_2; - set<size_t> S1set, S2set; - vector<size_t> S1vec; - vector<set<size_t> > S2vec; - vector<double> P1, P2; - - //calculate P0 (null) and P1 (for every SNP) - - - - //loop through the number of combinations - for (size_t s=0; s<p; s++) { - //if (s==0), set up S_1: compute marginal of the null model, then compute P_1, then compute BF_1 and use them to select S_1; compute C_1 - - - - //if (s==1), set up S_2: compute pair-wise P_2 and use them to select S_2; compute C_2 - - //otherwise, match each combination of S_2 with each SNP from S_1, select into S_3; and replace S_2 with S_3; compute C_s - - - //stop when the stopping critieria are reached (if S_2 is empty; if t; if kappa); add the residual component R - - for (size_t t=0; t<total_step; ++t) { - if (t%d_pace==0 || t==total_step-1) {ProgressBar ("Running MCMC ", t, total_step-1, (double)n_accept/(double)(t*n_mh+1));} -// if (t>10) {break;} - - if (a_mode==13) { - SampleZ (y, z_hat, z); - mean_z=CenterVector (z); - - time_start=clock(); - gsl_blas_dgemv (CblasTrans, 1.0, U, z, 0.0, Utz); - time_UtZ+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //First proposal - if (cHyp_old.n_gamma==0 || cHyp_old.rho==0) { - logPost_old=CalcPosterior(Utz, K_eval, Utu_old, alpha_old, cHyp_old); - beta_old.clear(); - for (size_t i=0; i<cHyp_old.n_gamma; ++i) { - beta_old.push_back(0); - } - } - else { - gsl_matrix *UtXgamma=gsl_matrix_alloc (ni_test, cHyp_old.n_gamma); - gsl_vector *beta=gsl_vector_alloc (cHyp_old.n_gamma); - SetXgamma (UtXgamma, UtX, rank_old); - logPost_old=CalcPosterior(UtXgamma, Utz, K_eval, UtXb_old, Utu_old, alpha_old, beta, cHyp_old); - - beta_old.clear(); - for (size_t i=0; i<beta->size; ++i) { - beta_old.push_back(gsl_vector_get(beta, i)); - } - gsl_matrix_free (UtXgamma); - gsl_vector_free (beta); - } - } - - - delete [] p_gamma; - beta_g.clear(); - - return; -} - -*/ - - - - - - -/* -//below fits MCMC for rho=1 -void BSLMM::CalcXtX (const gsl_matrix *X, const gsl_vector *y, const size_t s_size, gsl_matrix *XtX, gsl_vector *Xty) -{ - time_t time_start=clock(); - gsl_matrix_const_view X_sub=gsl_matrix_const_submatrix(X, 0, 0, X->size1, s_size); - gsl_matrix_view XtX_sub=gsl_matrix_submatrix(XtX, 0, 0, s_size, s_size); - gsl_vector_view Xty_sub=gsl_vector_subvector(Xty, 0, s_size); - -#ifdef WITH_LAPACK - lapack_dgemm ((char *)"T", (char *)"N", 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix); -#else - gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, &X_sub.matrix, &X_sub.matrix, 0.0, &XtX_sub.matrix); -#endif - gsl_blas_dgemv(CblasTrans, 1.0, &X_sub.matrix, y, 0.0, &Xty_sub.vector); - - time_Omega+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - return; -} - - - -double BSLMM::CalcPosterior (const double yty, class HYPBSLMM &cHyp) -{ - double logpost=0.0; - - //for quantitative traits, calculate pve and pge - //pve and pge for case/control data are calculted in CalcCC_PVEnZ - if (a_mode==11) { - cHyp.pve=0.0; - cHyp.pge=1.0; - } - - //calculate likelihood - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(yty);} - else {logpost-=0.5*yty;} - - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1-exp(cHyp.logp)); - - return logpost; -} - - -double BSLMM::CalcPosterior (const gsl_matrix *Xgamma, const gsl_matrix *XtX, const gsl_vector *Xty, const double yty, const size_t s_size, gsl_vector *Xb, gsl_vector *beta, class HYPBSLMM &cHyp) -{ - double sigma_a2=cHyp.h/( (1-cHyp.h)*exp(cHyp.logp)*(double)ns_test); - double logpost=0.0; - double d, P_yy=yty, logdet_O=0.0; - - gsl_matrix_const_view Xgamma_sub=gsl_matrix_const_submatrix (Xgamma, 0, 0, Xgamma->size1, s_size); - gsl_matrix_const_view XtX_sub=gsl_matrix_const_submatrix (XtX, 0, 0, s_size, s_size); - gsl_vector_const_view Xty_sub=gsl_vector_const_subvector (Xty, 0, s_size); - - gsl_matrix *Omega=gsl_matrix_alloc (s_size, s_size); - gsl_matrix *M_temp=gsl_matrix_alloc (s_size, s_size); - gsl_vector *beta_hat=gsl_vector_alloc (s_size); - gsl_vector *Xty_temp=gsl_vector_alloc (s_size); - - gsl_vector_memcpy (Xty_temp, &Xty_sub.vector); - - //calculate Omega - gsl_matrix_memcpy (Omega, &XtX_sub.matrix); - gsl_matrix_scale (Omega, sigma_a2); - gsl_matrix_set_identity (M_temp); - gsl_matrix_add (Omega, M_temp); - - //calculate beta_hat - logdet_O=CholeskySolve(Omega, Xty_temp, beta_hat); - gsl_vector_scale (beta_hat, sigma_a2); - - gsl_blas_ddot (Xty_temp, beta_hat, &d); - P_yy-=d; - - //sample tau - double tau=1.0; - if (a_mode==11) {tau =gsl_ran_gamma (gsl_r, (double)ni_test/2.0, 2.0/P_yy); } - - //sample beta - for (size_t i=0; i<s_size; i++) - { - d=gsl_ran_gaussian(gsl_r, 1); - gsl_vector_set(beta, i, d); - } - gsl_vector_view beta_sub=gsl_vector_subvector(beta, 0, s_size); - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, &beta_sub.vector); - - //it compuates inv(L^T(Omega)) %*% beta; - gsl_vector_scale(&beta_sub.vector, sqrt(sigma_a2/tau)); - gsl_vector_add(&beta_sub.vector, beta_hat); - gsl_blas_dgemv (CblasNoTrans, 1.0, &Xgamma_sub.matrix, &beta_sub.vector, 0.0, Xb); - - //for quantitative traits, calculate pve and pge - if (a_mode==11) { - gsl_blas_ddot (Xb, Xb, &d); - cHyp.pve=d/(double)ni_test; - cHyp.pve/=cHyp.pve+1.0/tau; - cHyp.pge=1.0; - } - - logpost=-0.5*logdet_O; - if (a_mode==11) {logpost-=0.5*(double)ni_test*log(P_yy);} - else {logpost-=0.5*P_yy;} - - logpost+=((double)cHyp.n_gamma-1.0)*cHyp.logp+((double)ns_test-(double)cHyp.n_gamma)*log(1.0-exp(cHyp.logp)); - - gsl_matrix_free (Omega); - gsl_matrix_free (M_temp); - gsl_vector_free (beta_hat); - gsl_vector_free (Xty_temp); - - return logpost; -} -*/ - - diff --git a/src/bslmmdap.h b/src/bslmmdap.h index ac78f97..a9e2d51 100644 --- a/src/bslmmdap.h +++ b/src/bslmmdap.h @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,10 +13,9 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ - #ifndef __BSLMMDAP_H__ #define __BSLMMDAP_H__ @@ -24,25 +23,14 @@ #include <map> #include <gsl/gsl_rng.h> #include <gsl/gsl_randist.h> - -#ifdef FORCE_FLOAT -#include "param_float.h" -#else #include "param.h" -#endif - using namespace std; - - - - - class BSLMMDAP { public: - // IO related parameters + // IO-related parameters. int a_mode; size_t d_pace; @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011-2017 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -194,11 +194,13 @@ bool ReadFile_snps_header (const string &file_snps, set<string> &setSnps) { return true; } -//Read log file -bool ReadFile_log (const string &file_log, double &pheno_mean) -{ +// Read log file. +bool ReadFile_log (const string &file_log, double &pheno_mean) { ifstream infile (file_log.c_str(), ifstream::in); - if (!infile) {cout<<"error! fail to open log file: "<<file_log<<endl; return false;} + if (!infile) { + cout << "error! fail to open log file: " << file_log << endl; + return false; + } string line; char *ch_ptr; @@ -229,15 +231,18 @@ bool ReadFile_log (const string &file_log, double &pheno_mean) return true; } - -//Read bimbam annotation file -bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map<string, long int> &mapRS2bp, map<string, double> &mapRS2cM) -{ +// Read bimbam annotation file. +bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, + map<string, long int> &mapRS2bp, + map<string, double> &mapRS2cM) { mapRS2chr.clear(); mapRS2bp.clear(); ifstream infile (file_anno.c_str(), ifstream::in); - if (!infile) {cout<<"error opening annotation file: "<<file_anno<<endl; return false;} + if (!infile) { + cout << "error opening annotation file: " << file_anno << endl; + return false; + } string line; char *ch_ptr; @@ -251,11 +256,23 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; ch_ptr=strtok (NULL, " , \t"); - if (strcmp(ch_ptr, "NA")==0) {b_pos=-9;} else {b_pos=atol(ch_ptr);} + if (strcmp(ch_ptr, "NA")==0) { + b_pos=-9; + } else { + b_pos=atol(ch_ptr); + } ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {chr="-9";} else {chr=ch_ptr;} + if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { + chr="-9"; + } else { + chr=ch_ptr; + } ch_ptr=strtok (NULL, " , \t"); - if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) {cM=-9;} else {cM=atof(ch_ptr);} + if (ch_ptr==NULL || strcmp(ch_ptr, "NA")==0) { + cM=-9; + } else { + cM=atof(ch_ptr); + } mapRS2chr[rs]=chr; mapRS2bp[rs]=b_pos; @@ -268,15 +285,17 @@ bool ReadFile_anno (const string &file_anno, map<string, string> &mapRS2chr, map return true; } -//read one column of phenotype -bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vector<double> &pheno, const int &p_column) -{ +// Read 1 column of phenotype. +bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, + vector<double> &pheno, const int &p_column) { indicator_idv.clear(); pheno.clear(); igzstream infile (file_pheno.c_str(), igzstream::in); -// ifstream infile (file_pheno.c_str(), ifstream::in); - if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;} + if (!infile) { + cout << "error! fail to open phenotype file: " << file_pheno << endl; + return false; + } string line; char *ch_ptr; @@ -288,8 +307,15 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vect for (int i=0; i<(p_column-1); ++i) { ch_ptr=strtok (NULL, " , \t"); } - if (strcmp(ch_ptr, "NA")==0) {indicator_idv.push_back(0); pheno.push_back(-9);} //pheno is different from pimass2 - else {p=atof(ch_ptr); indicator_idv.push_back(1); pheno.push_back(p);} + if (strcmp(ch_ptr, "NA")==0) { + indicator_idv.push_back(0); + pheno.push_back(-9); + } // Pheno is different from pimass2. + else { + p=atof(ch_ptr); + indicator_idv.push_back(1); + pheno.push_back(p); + } } infile.close(); @@ -298,17 +324,19 @@ bool ReadFile_column (const string &file_pheno, vector<int> &indicator_idv, vect return true; } - - -//Read bimbam phenotype file, p_column=1, 2 ... -bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, const vector<size_t> &p_column) -{ +// Read bimbam phenotype file, p_column=1, 2,... +bool ReadFile_pheno (const string &file_pheno, + vector<vector<int> > &indicator_pheno, + vector<vector<double> > &pheno, + const vector<size_t> &p_column) { indicator_pheno.clear(); pheno.clear(); igzstream infile (file_pheno.c_str(), igzstream::in); -// ifstream infile (file_pheno.c_str(), ifstream::in); - if (!infile) {cout<<"error! fail to open phenotype file: "<<file_pheno<<endl; return false;} + if (!infile) { + cout << "error! fail to open phenotype file: " << file_pheno << endl; + return false; + } string line; char *ch_ptr; @@ -333,8 +361,15 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p size_t i=0; while (i<p_max ) { if (mapP2c.count(i+1)!=0) { - if (strcmp(ch_ptr, "NA")==0) {ind_pheno_row[mapP2c[i+1]]=0; pheno_row[mapP2c[i+1]]=-9;} - else {p=atof(ch_ptr); ind_pheno_row[mapP2c[i+1]]=1; pheno_row[mapP2c[i+1]]=p;} + if (strcmp(ch_ptr, "NA")==0) { + ind_pheno_row[mapP2c[i+1]]=0; + pheno_row[mapP2c[i+1]]=-9; + } + else { + p=atof(ch_ptr); + ind_pheno_row[mapP2c[i+1]]=1; + pheno_row[mapP2c[i+1]]=p; + } } i++; ch_ptr=strtok (NULL, " , \t"); @@ -350,13 +385,15 @@ bool ReadFile_pheno (const string &file_pheno, vector<vector<int> > &indicator_p return true; } - -bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<vector<double> > &cvt, size_t &n_cvt) -{ +bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, + vector<vector<double> > &cvt, size_t &n_cvt) { indicator_cvt.clear(); ifstream infile (file_cvt.c_str(), ifstream::in); - if (!infile) {cout<<"error! fail to open covariates file: "<<file_cvt<<endl; return false;} + if (!infile) { + cout << "error! fail to open covariates file: " << file_cvt << endl; + return false; + } string line; char *ch_ptr; @@ -374,7 +411,11 @@ bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<ve v_d.push_back(d); ch_ptr=strtok (NULL, " , \t"); } - if (flag_na==0) {indicator_cvt.push_back(1);} else {indicator_cvt.push_back(0);} + if (flag_na==0) { + indicator_cvt.push_back(1); + } else { + indicator_cvt.push_back(0); + } cvt.push_back(v_d); } @@ -382,10 +423,16 @@ bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<ve else { flag_na=0; for (vector<int>::size_type i=0; i<indicator_cvt.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} + if (indicator_cvt[i]==0) { + continue; + } if (flag_na==0) {flag_na=1; n_cvt=cvt[i].size();} - if (flag_na!=0 && n_cvt!=cvt[i].size()) {cout<<"error! number of covariates in row "<<i<<" do not match other rows."<<endl; return false;} + if (flag_na!=0 && n_cvt!=cvt[i].size()) { + cout << "error! number of covariates in row " << + i << " do not match other rows." << endl; + return false; + } } } @@ -395,15 +442,15 @@ bool ReadFile_cvt (const string &file_cvt, vector<int> &indicator_cvt, vector<ve return true; } - - -//Read .bim file -bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) -{ - snpInfo.clear(); +// Read .bim file. +bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) { + snpInfo.clear(); ifstream infile (file_bim.c_str(), ifstream::in); - if (!infile) {cout<<"error opening .bim file: "<<file_bim<<endl; return false;} + if (!infile) { + cout << "error opening .bim file: " << file_bim << endl; + return false; + } string line; char *ch_ptr; @@ -429,7 +476,8 @@ bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) ch_ptr=strtok (NULL, " \t"); major=ch_ptr; - SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0}; + SNPINFO sInfo={chr, rs, cM, b_pos, minor, major, + 0, -9, -9, 0, 0, 0}; snpInfo.push_back(sInfo); } @@ -438,8 +486,7 @@ bool ReadFile_bim (const string &file_bim, vector<SNPINFO> &snpInfo) return true; } - -//Read .fam file +// Read .fam file. bool ReadFile_fam (const string &file_fam, vector<vector<int> > &indicator_pheno, vector<vector<double> > &pheno, map<string, int> &mapID2num, const vector<size_t> &p_column) { indicator_pheno.clear(); @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011-2017 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -121,22 +121,60 @@ bool ReadFile_gene (const string &file_gene, vector<double> &vec_read, vector<SNPINFO> &snpInfo, size_t &ng_total); bool ReadHeader_io (const string &line, HEADER &header); -bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, size_t &n_vc); -bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, size_t &n_vc); - -bool ReadFile_catc (const string &file_cat, map<string, vector<double> > &mapRS2catc, size_t &n_cat); -bool ReadFile_mcatc (const string &file_mcat, map<string, vector<double> > &mapRS2catc, size_t &n_cat); - -bool BimbamKin (const string &file_geno, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); -bool PlinkKin (const string &file_bed, const int display_pace, const vector<int> &indicator_idv, const vector<int> &indicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<SNPINFO> &snpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); -bool MFILEKin (const size_t mfile_mode, const string &file_mfile, const int display_pace, const vector<int> &indicator_idv, const vector<vector<int> > &mindicator_snp, const map<string, double> &mapRS2weight, const map<string, size_t> &mapRS2cat, const vector<vector<SNPINFO> > &msnpInfo, const gsl_matrix *W, gsl_matrix *matrix_kin, gsl_vector *vector_ns); - -bool ReadFile_wsnp (const string &file_wsnp, map<string, double> &mapRS2double); -bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, map<string, vector<double> > &mapRS2vector); - -void ReadFile_beta (const string &file_beta, const map<string, size_t> &mapRS2cat, const map<string, double> &mapRS2wA, vector<size_t> &vec_cat, vector<size_t> &vec_ni, vector<double> &vec_weight, vector<double> &vec_z2, size_t &ni_total, size_t &ns_total, size_t &ns_test); -void ReadFile_beta (const string &file_beta, const map<string, double> &mapRS2wA, map<string, string> &mapRS2A1, map<string, double> &mapRS2z); -void Calcq (const size_t n_block, const vector<size_t> &vec_cat, const vector<size_t> &vec_ni, const vector<double> &vec_weight, const vector<double> &vec_z2, gsl_matrix *Vq, gsl_vector *q, gsl_vector *s); +bool ReadFile_cat (const string &file_cat, map<string, size_t> &mapRS2cat, + size_t &n_vc); +bool ReadFile_mcat (const string &file_mcat, map<string, size_t> &mapRS2cat, + size_t &n_vc); + +bool ReadFile_catc (const string &file_cat, + map<string, vector<double> > &mapRS2catc, + size_t &n_cat); +bool ReadFile_mcatc (const string &file_mcat, map<string, + vector<double> > &mapRS2catc, size_t &n_cat); + +bool BimbamKin (const string &file_geno, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, const gsl_matrix *W, + gsl_matrix *matrix_kin, gsl_vector *vector_ns); +bool PlinkKin (const string &file_bed, const int display_pace, + const vector<int> &indicator_idv, + const vector<int> &indicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<SNPINFO> &snpInfo, + const gsl_matrix *W, gsl_matrix *matrix_kin, + gsl_vector *vector_ns); +bool MFILEKin (const size_t mfile_mode, const string &file_mfile, + const int display_pace, const vector<int> &indicator_idv, + const vector<vector<int> > &mindicator_snp, + const map<string, double> &mapRS2weight, + const map<string, size_t> &mapRS2cat, + const vector<vector<SNPINFO> > &msnpInfo, + const gsl_matrix *W, gsl_matrix *matrix_kin, + gsl_vector *vector_ns); + +bool ReadFile_wsnp (const string &file_wsnp, + map<string, double> &mapRS2double); +bool ReadFile_wsnp (const string &file_wcat, const size_t n_vc, + map<string, vector<double> > &mapRS2vector); + +void ReadFile_beta (const string &file_beta, + const map<string, size_t> &mapRS2cat, + const map<string, double> &mapRS2wA, + vector<size_t> &vec_cat, vector<size_t> &vec_ni, + vector<double> &vec_weight, vector<double> &vec_z2, + size_t &ni_total, size_t &ns_total, size_t &ns_test); +void ReadFile_beta (const string &file_beta, + const map<string, double> &mapRS2wA, + map<string, string> &mapRS2A1, + map<string, double> &mapRS2z); +void Calcq (const size_t n_block, const vector<size_t> &vec_cat, + const vector<size_t> &vec_ni, + const vector<double> &vec_weight, const vector<double> &vec_z2, + gsl_matrix *Vq, gsl_vector *q, gsl_vector *s); void ReadFile_study (const string &file_study, gsl_matrix *Vq, gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni); diff --git a/src/lapack.cpp b/src/lapack.cpp index 18399a2..2bbdf62 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -398,13 +398,7 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, // DO NOT set eigenvalues to be positive. double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix) { -#ifdef WITH_LAPACK lapack_eigen_symmv (G, eval, U, flag_largematrix); -#else - gsl_eigen_symmv_workspace *w=gsl_eigen_symmv_alloc (G->size1); - gsl_eigen_symmv (G, eval, U, w); - gsl_eigen_symmv_free (w); -#endif // Calculate track_G=mean(diag(G)). double d=0.0; @@ -420,48 +414,7 @@ double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, // DO NOT set eigen values to be positive. double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float *eval, const size_t flag_largematrix) { -#ifdef WITH_LAPACK lapack_float_eigen_symmv (G, eval, U, flag_largematrix); -#else - - // GSL doesn't provide single precision eigen decomposition; - // plus, float precision eigen decomposition in LAPACK may not - // work on OS 10.4 first change to double precision. - gsl_matrix *G_double=gsl_matrix_alloc (G->size1, G->size2); - gsl_matrix *U_double=gsl_matrix_alloc (U->size1, U->size2); - gsl_vector *eval_double=gsl_vector_alloc (eval->size); - for (size_t i=0; i<G->size1; i++) { - for (size_t j=0; j<G->size2; j++) { - gsl_matrix_set(G_double, i, j, - gsl_matrix_float_get(G, i, j)); - } - } - gsl_eigen_symmv_workspace *w_space=gsl_eigen_symmv_alloc (G->size1); - gsl_eigen_symmv (G_double, eval_double, U_double, w_space); - gsl_eigen_symmv_free (w_space); - - // Change back to float precision. - for (size_t i=0; i<G->size1; i++) { - for (size_t j=0; j<G->size2; j++) { - gsl_matrix_float_set(K, i, j, - gsl_matrix_get(G_double, i, j)); - } - } - for (size_t i=0; i<U->size1; i++) { - for (size_t j=0; j<U->size2; j++) { - gsl_matrix_float_set(U, i, j, - gsl_matrix_get(U_double, i, j)); - } - } - for (size_t i=0; i<eval->size; i++) { - gsl_vector_float_set(eval, i, gsl_vector_get(eval_double, i)); - } - - // Delete double-precision matrices. - gsl_matrix_free (G_double); - gsl_matrix_free (U_double); - gsl_vector_free (eval_double); -#endif // Calculate track_G=mean(diag(G)). double d = 0.0; @@ -477,28 +430,12 @@ double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { double logdet_O=0.0; -#ifdef WITH_LAPACK lapack_cholesky_decomp(Omega); for (size_t i=0; i<Omega->size1; ++i) { logdet_O+=log(gsl_matrix_get (Omega, i, i)); } logdet_O*=2.0; lapack_cholesky_solve(Omega, Xty, OiXty); -#else - int status = gsl_linalg_cholesky_decomp(Omega); - if(status == GSL_EDOM) { - cout << "## non-positive definite matrix" << endl; - } - - for (size_t i=0; i<Omega->size1; ++i) { - logdet_O+=log(gsl_matrix_get (Omega, i, i)); - } - logdet_O*=2.0; - - gsl_vector_memcpy (OiXty, Xty); - gsl_blas_dtrsv(CblasLower, CblasNoTrans, CblasNonUnit, Omega, OiXty); - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); -#endif return logdet_O; } @@ -508,43 +445,12 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_float *OiXty) { double logdet_O=0.0; -#ifdef WITH_LAPACK lapack_float_cholesky_decomp(Omega); for (size_t i=0; i<Omega->size1; ++i) { logdet_O+=log(gsl_matrix_float_get (Omega, i, i)); } logdet_O*=2.0; lapack_float_cholesky_solve(Omega, Xty, OiXty); -#else - gsl_matrix *Omega_double=gsl_matrix_alloc (Omega->size1, Omega->size2); - double d; - for (size_t i=0; i<Omega->size1; ++i) { - for (size_t j=0; j<Omega->size2; ++j) { - d=(double)gsl_matrix_float_get (Omega, i, j); - gsl_matrix_set (Omega_double, i, j, d); - } - } - - int status = gsl_linalg_cholesky_decomp(Omega_double); - if(status == GSL_EDOM) { - cout << "## non-positive definite matrix" << endl; - } - - for (size_t i=0; i<Omega->size1; ++i) { - for (size_t j=0; j<Omega->size2; ++j) { - d=gsl_matrix_get (Omega_double, i, j); - if (j==i) {logdet_O+=log(d);} - gsl_matrix_float_set (Omega, i, j, (float)d); - } - } - logdet_O*=2.0; - - gsl_vector_float_memcpy (OiXty, Xty); - gsl_blas_strsv(CblasLower, CblasNoTrans, CblasNonUnit, Omega, OiXty); - gsl_blas_strsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); - - gsl_matrix_free (Omega_double); -#endif return logdet_O; } diff --git a/src/param.cpp b/src/param.cpp index 63bf349..0a1d721 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011-2017 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by diff --git a/src/param.h b/src/param.h index 18d5e36..9707790 100644 --- a/src/param.h +++ b/src/param.h @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011-2017 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by |