From 943e970c9cbc184dcca679fbe455f48c32242cdc Mon Sep 17 00:00:00 2001 From: xiangzhou Date: Mon, 23 May 2016 17:05:35 -0400 Subject: version 0.95alpha --- src/gemma.cpp | 1840 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 1416 insertions(+), 424 deletions(-) (limited to 'src/gemma.cpp') diff --git a/src/gemma.cpp b/src/gemma.cpp index b8693a8..3b9fe29 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -39,9 +39,11 @@ #include "vc_float.h" #include "lm_float.h" //for LM class #include "bslmm_float.h" //for BSLMM class +#include "ldr_float.h" //for LDR class #include "lmm_float.h" //for LMM class, and functions CalcLambda, CalcPve, CalcVgVe #include "mvlmm_float.h" //for MVLMM class #include "prdt_float.h" //for PRDT class +#include "varcov_float.h" //for MVLMM class #include "mathfunc_float.h" //for a few functions #else #include "io.h" @@ -49,9 +51,11 @@ #include "vc.h" #include "lm.h" #include "bslmm.h" +#include "ldr.h" #include "lmm.h" #include "mvlmm.h" #include "prdt.h" +#include "varcov.h" #include "mathfunc.h" #endif @@ -60,26 +64,23 @@ using namespace std; -GEMMA::GEMMA(void): -version("0.95alpha"), date("08/08/2014"), year("2011") +GEMMA::GEMMA(void): +version("0.95alpha"), date("07/11/2015"), year("2011") {} void GEMMA::PrintHeader (void) { cout< indicator_all; size_t c_bv=0; @@ -854,13 +1136,13 @@ void GEMMA::BatchRun (PARAM &cPar) indicator_all.push_back(1); if (cPar.indicator_bv[i]==1) {gsl_vector_set(u_hat, c_bv, cPar.vec_bv[i]); c_bv++;} } - + ReadFile_kin (cPar.file_kin, indicator_all, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size1, cPar.n_cvt); + gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); + gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); gsl_vector *eval=gsl_vector_alloc (Y->size1); - + gsl_matrix *Y_full=gsl_matrix_alloc (cPar.ni_cvt, cPar.n_ph); gsl_matrix *W_full=gsl_matrix_alloc (Y_full->size1, cPar.n_cvt); //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, + //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); cPar.CopyCvtPhen (W_full, Y_full, 1); - - gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph); - gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1); + + gsl_matrix *Y_hat=gsl_matrix_alloc (Y_full->size1, cPar.n_ph); + gsl_matrix *G_full=gsl_matrix_alloc (Y_full->size1, Y_full->size1); gsl_matrix *H_full=gsl_matrix_alloc (Y_full->size1*Y_hat->size2, Y_full->size1*Y_hat->size2); - + //read relatedness matrix G, and matrix G_full ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; i++) { @@ -937,8 +1219,8 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.trace_G+=gsl_vector_get (eval, i); } cPar.trace_G/=(double)eval->size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); @@ -948,7 +1230,7 @@ void GEMMA::BatchRun (PARAM &cPar) if (cPar.n_ph==1) { gsl_vector *beta=gsl_vector_alloc (W->size2); gsl_vector *se_beta=gsl_vector_alloc (W->size2); - + double lambda, logl, vg, ve; gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); @@ -959,29 +1241,29 @@ void GEMMA::BatchRun (PARAM &cPar) cout<<"REMLE estimate for vg in the null model = "<size1; i++) { for (size_t j=0; j<=i; j++) { @@ -1004,110 +1286,250 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.Ve_remle_null.push_back(gsl_matrix_get (Ve, i, j) ); } } - + //obtain Y_hat from fixed effects gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1.0, W_full, B, 0.0, Y_hat); - + //obtain H KroneckerSym(G_full, Vg, H_full); for (size_t i=0; isize1; i++) { gsl_matrix_view H_sub=gsl_matrix_submatrix (H_full, i*Ve->size1, i*Ve->size2, Ve->size1, Ve->size2); gsl_matrix_add (&H_sub.matrix, Ve); } - - //free matrices + + //free matrices gsl_matrix_free (Vg); gsl_matrix_free (Ve); gsl_matrix_free (B); gsl_matrix_free (se_B); } - + PRDT cPRDT; - + cPRDT.CopyFromParam(cPar); - + cout<<"Predicting Missing Phentypes ... "< setSnps_beta; + map mapRS2wA, mapRS2wK; + + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + time_start=clock(); + cPar.CalcS (mapRS2wA, mapRS2wK, W, A, K, &S_mat.matrix, &Svar_mat.matrix, &ns_vec.vector); + cPar.time_G=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + if (cPar.error==true) {cout<<"error! fail to calculate the S matrix. "< vec_cat, vec_ni; + vector vec_weight, vec_z2; + map mapRS2weight; + mapRS2weight.clear(); + + time_start=clock(); + ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2weight, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_total, cPar.ns_total, cPar.ns_test); + cout<<"## number of total individuals = "<size1, cPar.n_cvt); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, + gsl_matrix *W=gsl_matrix_alloc (Y->size1, cPar.n_cvt); + + //set covariates matrix W and phenotype matrix Y + //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); - + //Fit LM or mvLM - if (cPar.n_ph==1) { + if (cPar.n_ph==1) { LM cLm; cLm.CopyFromParam(cPar); - + gsl_vector_view Y_col=gsl_matrix_column (Y, 0); - - if (!cPar.file_gene.empty()) { + + if (!cPar.file_gene.empty()) { cLm.AnalyzeGene (W, &Y_col.vector); //y is the predictor, not the phenotype } else if (!cPar.file_bfile.empty()) { cLm.AnalyzePlink (W, &Y_col.vector); + } else if (!cPar.file_oxford.empty()) { + cLm.Analyzebgen (W, &Y_col.vector); } else { cLm.AnalyzeBimbam (W, &Y_col.vector); } - + cLm.WriteFiles(); cLm.CopyToParam(cPar); } /* - else { + else { MVLM cMvlm; - cMvlm.CopyFromParam(cPar); - + cMvlm.CopyFromParam(cPar); + if (!cPar.file_bfile.empty()) { cMvlm.AnalyzePlink (W, Y); } else { cMvlm.AnalyzeBimbam (W, Y); } - + cMvlm.WriteFiles(); cMvlm.CopyToParam(cPar); } @@ -1115,27 +1537,202 @@ void GEMMA::BatchRun (PARAM &cPar) //release all matrices and vectors gsl_matrix_free (Y); gsl_matrix_free (W); - } + } //VC estimation with one or multiple kinship matrices //REML approach only //if file_kin or file_ku/kd is provided, then a_mode is changed to 5 already, in param.cpp - //for one phenotype only; - if (cPar.a_mode==61) { + //for one phenotype only; + if (cPar.a_mode==61 || cPar.a_mode==62) { + if (!cPar.file_beta.empty() ) { + //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK + //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 + //update indicator_snps, so that the numbers are in accordance with mapRS2wK + set setSnps_beta; + ReadFile_snps_header (cPar.file_beta, setSnps_beta); + + map mapRS2wA, mapRS2wK; + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + cPar.UpdateSNP (mapRS2wK); + + //setup matrices and vectors + gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc*2, cPar.n_vc); + gsl_matrix *Vq=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); + gsl_vector *q=gsl_vector_alloc (cPar.n_vc); + gsl_vector *s=gsl_vector_alloc (cPar.n_vc+1); + + gsl_matrix *K=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); + gsl_matrix *A=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc*cPar.ni_test); + + gsl_vector *y=gsl_vector_alloc (cPar.ni_test); + gsl_matrix *W=gsl_matrix_alloc (cPar.ni_test, cPar.n_cvt); + + gsl_matrix_set_zero (K); + gsl_matrix_set_zero (A); + + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Vq); + gsl_vector_set_zero (q); + gsl_vector_set_zero (s); + + cPar.CopyCvtPhen (W, y, 0); + + gsl_matrix_view S_mat=gsl_matrix_submatrix(S, 0, 0, cPar.n_vc, cPar.n_vc); + gsl_matrix_view Svar_mat=gsl_matrix_submatrix (S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc); + gsl_vector_view s_vec=gsl_vector_subvector(s, 0, cPar.n_vc); + + vector vec_cat, vec_ni; + vector vec_weight, vec_z2; + + //read beta, based on the mapRS2wK + ReadFile_beta (cPar.file_beta, cPar.mapRS2cat, mapRS2wK, vec_cat, vec_ni, vec_weight, vec_z2, cPar.ni_study, cPar.ns_study, cPar.ns_test); + + cout<<"Study Panel: "<size1, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1*cPar.n_vc ); - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, + //set covariates matrix W and phenotype matrix Y + //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); //read kinship matrices if (!(cPar.file_mk).empty()) { ReadFile_mk (cPar.file_mk, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } else { ReadFile_eigenU (cPar.file_ku, cPar.error, U); if (cPar.error==true) {cout<<"error! fail to read the U file. "<size; i++) { if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} @@ -1202,7 +1799,7 @@ void GEMMA::BatchRun (PARAM &cPar) if (cPar.n_ph==1) { // if (cPar.n_vc==1) { /* - //calculate UtW and Uty + //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, Y, UtY); @@ -1228,10 +1825,10 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } - + CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); - + //calculate and output residuals if (cPar.a_mode==5) { gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); @@ -1239,11 +1836,11 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector *u_hat=gsl_vector_alloc (Y->size1); gsl_vector *e_hat=gsl_vector_alloc (Y->size1); gsl_vector *y_hat=gsl_vector_alloc (Y->size1); - + //obtain Utu and Ute gsl_vector_memcpy (y_hat, &UtY_col.vector); gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); - + double d, u, e; for (size_t i=0; isize; i++) { d=gsl_vector_get (eval, i); @@ -1252,37 +1849,210 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector_set (Utu_hat, i, u); gsl_vector_set (Ute_hat, i, e); } - + //obtain u and e gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); - - //output residuals + + //output residuals cPar.WriteVector(u_hat, "residU"); cPar.WriteVector(e_hat, "residE"); - + gsl_vector_free(u_hat); gsl_vector_free(e_hat); gsl_vector_free(y_hat); - } -*/ + } +*/ // } else { gsl_vector_view Y_col=gsl_matrix_column (Y, 0); VC cVc; - cVc.CopyFromParam(cPar); - cVc.CalcVCreml (G, W, &Y_col.vector); + cVc.CopyFromParam(cPar); + if (cPar.a_mode==61) { + cVc.CalcVChe (G, W, &Y_col.vector); + } else { + cVc.CalcVCreml (cPar.noconstrain, G, W, &Y_col.vector); + } cVc.CopyToParam(cPar); - //obtain pve from sigma2 //obtain se_pve from se_sigma2 - + //} - } + } + } + + } + - + //compute confidence intervals with additional summary statistics + //we do not check the sign of z-scores here, but they have to be matched with the genotypes + if (cPar.a_mode==66 || cPar.a_mode==67) { + //read reference file first + gsl_matrix *S=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); + gsl_matrix *Svar=gsl_matrix_alloc (cPar.n_vc, cPar.n_vc); + gsl_vector *s_ref=gsl_vector_alloc (cPar.n_vc); + + gsl_matrix_set_zero(S); + gsl_matrix_set_zero(Svar); + gsl_vector_set_zero(s_ref); + + if (!cPar.file_ref.empty()) { + ReadFile_ref(cPar.file_ref, S, Svar, s_ref, cPar.ni_ref); + } else { + ReadFile_mref(cPar.file_mref, S, Svar, s_ref, cPar.ni_ref); + } + + //need to obtain a common set of SNPs between beta file and the genotype file; these are saved in mapRS2wA and mapRS2wK + //normalize the weight in mapRS2wK to have an average of one; each element of mapRS2wA is 1 + set setSnps_beta; + ReadFile_snps_header (cPar.file_beta, setSnps_beta); + + //obtain the weights for wA, which contains the SNP weights for SNPs used in the model + map mapRS2wK; + cPar.ObtainWeight(setSnps_beta, mapRS2wK); + + //set up matrices and vector + gsl_matrix *Xz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); + gsl_matrix *XWz=gsl_matrix_alloc (cPar.ni_test, cPar.n_vc); + gsl_matrix *XtXWz=gsl_matrix_alloc (mapRS2wK.size(), cPar.n_vc*cPar.n_vc); + gsl_vector *w=gsl_vector_alloc (mapRS2wK.size()); + gsl_vector *w1=gsl_vector_alloc (mapRS2wK.size()); + gsl_vector *z=gsl_vector_alloc (mapRS2wK.size()); + gsl_vector *s_vec=gsl_vector_alloc (cPar.n_vc); + + vector vec_cat, vec_size; + vector vec_z; + + map mapRS2z, mapRS2wA; + map mapRS2A1; + string file_str; + + //update s_vec, the number of snps in each category + for (size_t i=0; i::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { + vec_size[cPar.mapRS2cat[it->first]]++; + } + + for (size_t i=0; i::const_iterator it=mapRS2wK.begin(); it!=mapRS2wK.end(); ++it) { + mapRS2wA[it->first]=1; + } + } else { + cPar.UpdateWeight (0, mapRS2wK, cPar.ni_test, s_vec, mapRS2wA); + } + + //read in z-scores based on allele 0, and save that into a vector + ReadFile_beta (cPar.file_beta, mapRS2wA, mapRS2A1, mapRS2z); + + //update snp indicator, save weights to w, save z-scores to vec_z, save category label to vec_cat + //sign of z is determined by matching alleles + cPar.UpdateSNPnZ (mapRS2wA, mapRS2A1, mapRS2z, w, z, vec_cat); + + //compute an n by k matrix of X_iWz + cout<<"Calculating Xz ... "<size2, W->size2); //B is a d by c matrix gsl_matrix *se_B=gsl_matrix_alloc (Y->size2, W->size2); gsl_matrix *G=gsl_matrix_alloc (Y->size1, Y->size1); - gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); + gsl_matrix *U=gsl_matrix_alloc (Y->size1, Y->size1); gsl_matrix *UtW=gsl_matrix_alloc (Y->size1, W->size2); gsl_matrix *UtY=gsl_matrix_alloc (Y->size1, Y->size2); gsl_vector *eval=gsl_vector_alloc (Y->size1); - - //set covariates matrix W and phenotype matrix Y - //an intercept should be included in W, + gsl_vector *env=gsl_vector_alloc (Y->size1); + gsl_vector *weight=gsl_vector_alloc (Y->size1); + + //set covariates matrix W and phenotype matrix Y + //an intercept should be included in W, cPar.CopyCvtPhen (W, Y, 0); - - //read relatedness matrix G + if (!cPar.file_gxe.empty()) {cPar.CopyGxe (env);} + + //read relatedness matrix G if (!(cPar.file_kin).empty()) { ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size1; i++) { + wi=gsl_vector_get(weight, i); + for (size_t j=i; jsize2; j++) { + wj=gsl_vector_get(weight, j); + d=gsl_matrix_get(G, i, j); + if (wi<=0 || wj<=0) {d=0;} else {d/=sqrt(wi*wj);} + gsl_matrix_set(G, i, j, d); + if (j!=i) {gsl_matrix_set(G, j, i, d);} + } + } + } + //eigen-decomposition and calculate trace_G cout<<"Start Eigen-Decomposition..."<size1; i++) { + wi=gsl_vector_get(weight, i); + if (wi<=0) {wi=0;} else {wi=sqrt(wi);} + gsl_vector_view Urow=gsl_matrix_row (U, i); + gsl_vector_scale (&Urow.vector, wi); + } + } + cPar.trace_G=0.0; for (size_t i=0; isize; i++) { if (gsl_vector_get (eval, i)<1e-10) {gsl_vector_set (eval, i, 0);} @@ -1324,14 +2123,14 @@ void GEMMA::BatchRun (PARAM &cPar) } cPar.trace_G/=(double)eval->size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); } else { ReadFile_eigenU (cPar.file_ku, cPar.error, U); if (cPar.error==true) {cout<<"error! fail to read the U file. "<size; i++) { if (gsl_vector_get(eval, i)<1e-10) {gsl_vector_set(eval, i, 0);} @@ -1339,14 +2138,29 @@ void GEMMA::BatchRun (PARAM &cPar) } cPar.trace_G/=(double)eval->size; } - + if (cPar.a_mode==31) { cPar.WriteMatrix(U, "eigenU"); cPar.WriteVector(eval, "eigenD"); - } else { - //calculate UtW and Uty + } else if (!cPar.file_gene.empty() ) { + //calculate UtW and Uty CalcUtX (U, W, UtW); - CalcUtX (U, Y, UtY); + CalcUtX (U, Y, UtY); + + LMM cLmm; + cLmm.CopyFromParam(cPar); + + gsl_vector_view Y_col=gsl_matrix_column (Y, 0); + gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); + + cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype + + cLmm.WriteFiles(); + cLmm.CopyToParam(cPar); + } else { + //calculate UtW and Uty + CalcUtX (U, W, UtW); + CalcUtX (U, Y, UtY); //calculate REMLE/MLE estimate and pve for univariate model if (cPar.n_ph==1) { @@ -1372,10 +2186,10 @@ void GEMMA::BatchRun (PARAM &cPar) cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i) ); cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i) ); } - + CalcPve (eval, UtW, &UtY_col.vector, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); cPar.PrintSummary(); - + //calculate and output residuals if (cPar.a_mode==5) { gsl_vector *Utu_hat=gsl_vector_alloc (Y->size1); @@ -1383,11 +2197,11 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector *u_hat=gsl_vector_alloc (Y->size1); gsl_vector *e_hat=gsl_vector_alloc (Y->size1); gsl_vector *y_hat=gsl_vector_alloc (Y->size1); - + //obtain Utu and Ute gsl_vector_memcpy (y_hat, &UtY_col.vector); gsl_blas_dgemv (CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat); - + double d, u, e; for (size_t i=0; isize; i++) { d=gsl_vector_get (eval, i); @@ -1396,81 +2210,106 @@ void GEMMA::BatchRun (PARAM &cPar) gsl_vector_set (Utu_hat, i, u); gsl_vector_set (Ute_hat, i, e); } - + //obtain u and e gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu_hat, 0.0, u_hat); gsl_blas_dgemv (CblasNoTrans, 1.0, U, Ute_hat, 0.0, e_hat); - - //output residuals + + //output residuals cPar.WriteVector(u_hat, "residU"); cPar.WriteVector(e_hat, "residE"); - + gsl_vector_free(u_hat); gsl_vector_free(e_hat); gsl_vector_free(y_hat); - } - } - + } + } + //Fit LMM or mvLMM if (cPar.a_mode==1 || cPar.a_mode==2 || cPar.a_mode==3 || cPar.a_mode==4) { - if (cPar.n_ph==1) { + if (cPar.n_ph==1) { LMM cLmm; cLmm.CopyFromParam(cPar); - + gsl_vector_view Y_col=gsl_matrix_column (Y, 0); gsl_vector_view UtY_col=gsl_matrix_column (UtY, 0); - - if (!cPar.file_gene.empty()) { - cLmm.AnalyzeGene (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); //y is the predictor, not the phenotype - } else if (!cPar.file_bfile.empty()) { - cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); - } else { - cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); - } - + + if (!cPar.file_bfile.empty()) { + if (cPar.file_gxe.empty()) { + cLmm.AnalyzePlink (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); + } else { + cLmm.AnalyzePlinkGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); + } + } + // WJA added + else if(!cPar.file_oxford.empty()) { + cLmm.Analyzebgen (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); + } + else { + if (cPar.file_gxe.empty()) { + cLmm.AnalyzeBimbam (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector); + } else { + cLmm.AnalyzeBimbamGXE (U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, env); + } + } + cLmm.WriteFiles(); cLmm.CopyToParam(cPar); - } else { + } else { MVLMM cMvlmm; - cMvlmm.CopyFromParam(cPar); - + cMvlmm.CopyFromParam(cPar); + if (!cPar.file_bfile.empty()) { - cMvlmm.AnalyzePlink (U, eval, UtW, UtY); - } else { - cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY); + if (cPar.file_gxe.empty()) { + cMvlmm.AnalyzePlink (U, eval, UtW, UtY); + } else { + cMvlmm.AnalyzePlinkGXE (U, eval, UtW, UtY, env); + } + } + else if(!cPar.file_oxford.empty()) + { + cMvlmm.Analyzebgen (U, eval, UtW, UtY); + } + else { + if (cPar.file_gxe.empty()) { + cMvlmm.AnalyzeBimbam (U, eval, UtW, UtY); + } else { + cMvlmm.AnalyzeBimbamGXE (U, eval, UtW, UtY, env); + } } - + cMvlmm.WriteFiles(); cMvlmm.CopyToParam(cPar); } } } - - + + //release all matrices and vectors gsl_matrix_free (Y); gsl_matrix_free (W); gsl_matrix_free(B); gsl_matrix_free(se_B); - gsl_matrix_free (G); + gsl_matrix_free (G); gsl_matrix_free (U); gsl_matrix_free (UtW); gsl_matrix_free (UtY); gsl_vector_free (eval); - } - - + gsl_vector_free (env); + } + + //BSLMM if (cPar.a_mode==11 || cPar.a_mode==12 || cPar.a_mode==13) { gsl_vector *y=gsl_vector_alloc (cPar.ni_test); - gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); + gsl_matrix *W=gsl_matrix_alloc (y->size, cPar.n_cvt); gsl_matrix *G=gsl_matrix_alloc (y->size, y->size); - gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); - - //set covariates matrix W and phenotype vector y - //an intercept should be included in W, + gsl_matrix *UtX=gsl_matrix_alloc (y->size, cPar.ns_test); + + //set covariates matrix W and phenotype vector y + //an intercept should be included in W, cPar.CopyCvtPhen (W, y, 0); - + //center y, even for case/control data cPar.pheno_mean=CenterVector(y); @@ -1482,32 +2321,32 @@ void GEMMA::BatchRun (PARAM &cPar) //perform BSLMM analysis BSLMM cBslmm; cBslmm.CopyFromParam(cPar); - time_start=clock(); + time_start=clock(); cBslmm.MCMC(UtX, y); cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); cBslmm.CopyToParam(cPar); //else, if rho!=1 } else { - gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); + gsl_matrix *U=gsl_matrix_alloc (y->size, y->size); gsl_vector *eval=gsl_vector_alloc (y->size); gsl_matrix *UtW=gsl_matrix_alloc (y->size, W->size2); gsl_vector *Uty=gsl_vector_alloc (y->size); - - //read relatedness matrix G - if (!(cPar.file_kin).empty()) { + + //read relatedness matrix G + if (!(cPar.file_kin).empty()) { cPar.ReadGenotypes (UtX, G, false); - + //read relatedness matrix G ReadFile_kin (cPar.file_kin, cPar.indicator_idv, cPar.mapID2num, cPar.k_mode, cPar.error, G); if (cPar.error==true) {cout<<"error! fail to read kinship/relatedness file. "<size; - cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); - - //calculate UtW and Uty + cPar.time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + + //calculate UtW and Uty CalcUtX (U, W, UtW); CalcUtX (U, y, Uty); - + //calculate REMLE/MLE estimate and pve CalcLambda ('L', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0); CalcLambda ('R', eval, UtW, Uty, cPar.l_min, cPar.l_max, cPar.n_region, cPar.l_remle_null, cPar.logl_remle_H0); CalcPve (eval, UtW, Uty, cPar.l_remle_null, cPar.trace_G, cPar.pve_null, cPar.pve_se_null); - + cPar.PrintSummary(); - + //Creat and calcualte UtX, use a large memory cout<<"Calculating UtX..."<size, cPar.n_cvt); + gsl_matrix *G=gsl_matrix_alloc (1, 1); + vector > Xt; + + //set covariates matrix W and phenotype vector y + //an intercept is included in W + cPar.CopyCvtPhen (W, y, 0); + + //read in genotype matrix X + cPar.ReadGenotypes (Xt, G, false); + + LDR cLdr; + cLdr.CopyFromParam(cPar); + time_start=clock(); + + cLdr.VB(Xt, W, y); + + cPar.time_opt=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); + cLdr.CopyToParam(cPar); + + gsl_vector_free (y); + gsl_matrix_free (W); + gsl_matrix_free (G); + } + cPar.time_total=(clock()-time_begin)/(double(CLOCKS_PER_SEC)*60.0); - + return; } -void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar) +void GEMMA::WriteLog (int argc, char ** argv, PARAM &cPar) { string file_str; file_str=cPar.path_out+"/"+cPar.file_out; file_str+=".log.txt"; - + ofstream outfile (file_str.c_str(), ofstream::out); if (!outfile) {cout<<"error writing log file: "<tm_month<<":"<tm_day":"<tm_hour<<":"<tm_min<1) { + outfile<<"## total pve = "<1) { + outfile<<"## total pve = "<1) { + outfile<<"## total pve = "<=1 && cPar.a_mode<=4) || (cPar.a_mode>=51 && cPar.a_mode<=54) ) { outfile<<"## time on optimization = "<