From c18588b6d00650b9ce742229fdf1eca7133f58fc Mon Sep 17 00:00:00 2001 From: Peter Carbonetto Date: Thu, 4 May 2017 14:41:32 -0500 Subject: Local updates made by Xiang---shared via email on May 4, 2017, subject: gemma on expression data. --- src/gemma.cpp | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 216 insertions(+), 25 deletions(-) (limited to 'src/gemma.cpp') diff --git a/src/gemma.cpp b/src/gemma.cpp index 682835f..ca9c4aa 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -39,9 +39,12 @@ #include "vc_float.h" #include "lm_float.h" //for LM class #include "bslmm_float.h" //for BSLMM class +#include "bslmmdap_float.h" //for BSLMMDAP class +#include "ldr_float.h" //for LDR class #include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe #include "mvlmm_float.h" //for MVLMM class #include "prdt_float.h" //for PRDT class +#include "varcov_float.h" //for MVLMM class #include "mathfunc_float.h" //for a few functions #else #include "io.h" @@ -49,9 +52,12 @@ #include "vc.h" #include "lm.h" #include "bslmm.h" +#include "bslmmdap.h" +#include "ldr.h" #include "lmm.h" #include "mvlmm.h" #include "prdt.h" +#include "varcov.h" #include "mathfunc.h" #endif @@ -365,6 +371,8 @@ void GEMMA::PrintHelp(size_t option) cout<<" options: 1: BSLMM"<size, W->size2); gsl_vector *Uty=gsl_vector_alloc (y->size); - //read relatedness matrix G if (!(cPar.file_kin).empty()) { cPar.ReadGenotypes (UtX, G, false); @@ -2373,17 +2411,20 @@ void GEMMA::BatchRun (PARAM &cPar) CalcUtX (U, UtX); cPar.time_UtX=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - //perform BSLMM analysis - BSLMM cBslmm; - cBslmm.CopyFromParam(cPar); - time_start=clock(); - if (cPar.a_mode==12) { //ridge regression - cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null); - } else { //Run MCMC - cBslmm.MCMC(U, UtX, Uty, eval, y); + //perform BSLMM or BSLMMDAP analysis + if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { + BSLMM cBslmm; + cBslmm.CopyFromParam(cPar); + time_start=clock(); + if (cPar.a_mode==12) { //ridge regression + cBslmm.RidgeR(U, UtX, Uty, eval, cPar.l_remle_null); + } else { //Run MCMC + cBslmm.MCMC(U, UtX, Uty, eval, y); + } + cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cBslmm.CopyToParam(cPar); + } else { } - cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - cBslmm.CopyToParam(cPar); //release all matrices and vectors gsl_matrix_free (G); @@ -2399,8 +2440,157 @@ void GEMMA::BatchRun (PARAM &cPar) } + + //BSLMM-DAP + if (cPar.a_mode==14 || cPar.a_mode==15 || cPar.a_mode==16) { + if (cPar.a_mode==14) { + gsl_vector *y=gsl_vector_alloc (cPar.ni_test); + gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); + gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); + gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); + + //set covariates matrix W and phenotype vector y + //an intercept should be included in W, + cPar.CopyCvtPhen (W, y, 0); + + //center y, even for case/control data + cPar.pheno_mean=CenterVector(y); + + //run bvsr if rho==1 + if (cPar.rho_min==1 && cPar.rho_max==1) { + //read genotypes X (not UtX) + cPar.ReadGenotypes (UtX, G, false); + + //perform BSLMM analysis + BSLMM cBslmm; + cBslmm.CopyFromParam(cPar); + time_start=clock(); + cBslmm.MCMC(UtX, y); + cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cBslmm.CopyToParam(cPar); + //else, if rho!=1 + } else { + gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); + gsl_vector *eval=gsl_vector_alloc (y->size); + gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); + gsl_vector *Uty=gsl_vector_alloc (y->size); + + //read relatedness matrix G + if (!(cPar.file_kin).empty()) { + cPar.ReadGenotypes (UtX, G, false); + + //read relatedness matrix G + ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); + if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; i++) { + if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} + cPar.trace_G+=gsl_vector_get (eval, i); + } + cPar.trace_G/=(double)eval->size; + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + + //calculate UtW and Uty + CalcUtX (U, W, UtW); + CalcUtX (U, y, Uty); + + //calculate REMLE/MLE estimate and pve + CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); + CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); + CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); + + cPar.PrintSummary(); + + //Creat and calcualte UtX, use a large memory + cout<<"Calculating UtX..."< vec_rs; + vector vec_sa2, vec_sb2, wab; + vector > > BF; + + //read hyp and bf files (functions defined in BSLMMDAP) + ReadFile_hyb (cPar.file_hyp, vec_sa2, vec_sb2, wab); + ReadFile_bf (cPar.file_bf, vec_rs, BF); + + cPar.ns_test=vec_rs.size(); + if (wab.size()!=BF[0][0].size()) {cout<<"error! hyp and bf files dimension do not match"<