about summary refs log tree commit diff
path: root/src/mvlmm.cpp
diff options
context:
space:
mode:
authorxiangzhou2016-05-23 17:05:35 -0400
committerxiangzhou2016-05-23 17:05:35 -0400
commit943e970c9cbc184dcca679fbe455f48c32242cdc (patch)
tree70369d969dee3432e09e345c8106a541ac0d5a76 /src/mvlmm.cpp
parent3ab77854a52ac016b9e2b824858f5f0c695d4170 (diff)
downloadpangemma-943e970c9cbc184dcca679fbe455f48c32242cdc.tar.gz
version 0.95alpha
Diffstat (limited to 'src/mvlmm.cpp')
-rw-r--r--src/mvlmm.cpp451
1 files changed, 278 insertions, 173 deletions
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 5826a1f..7655b50 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -42,6 +42,7 @@
 
 #include "io.h"
 #include "lapack.h"
+#include "eigenlib.h"
 #include "gzstream.h"
 
 #ifdef FORCE_FLOAT
@@ -2935,12 +2936,17 @@ void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 	ifstream infile (file_bgen.c_str(), ios::binary);
 	if (!infile) {cout<<"error reading bgen file:"<<file_bgen<<endl; return;}
 
-
 	clock_t time_start=clock();
 	time_UtX=0; time_opt=0;
 
 	string line;
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	//	double lambda_mle=0, lambda_remle=0, beta=0, se=0, ;
 	double logl_H0=0.0, logl_H1=0.0, p_wald=0, p_lrt=0, p_score=0;
 	double crt_a, crt_b, crt_c;
@@ -3179,6 +3185,7 @@ void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 
 
 	//start reading genotypes and analyze
+	size_t csnp=0;
 	for (size_t t=0; t<indicator_snp.size(); ++t) {
 
 
@@ -3287,87 +3294,112 @@ void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 		for (size_t i=0; i<ni_test; ++i) {
 			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
 			geno=gsl_vector_get(x, i);
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		//initial values
-		gsl_matrix_memcpy (V_g, V_g_null);
-		gsl_matrix_memcpy (V_e, V_e_null);
-		gsl_matrix_memcpy (B, B_null);
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		csnp++;
 
-		time_start=clock();
+		if (csnp%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
 
-		//3 is before 1
-		if (a_mode==3 || a_mode==4) {
-			p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-			if (p_score<p_nr && crt==1) {
-				logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-			}
-		}
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-		if (a_mode==2 || a_mode==4) {
-			logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		  gsl_matrix_set_zero (Xlarge);
+
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
+
+		    //initial values
+		    gsl_matrix_memcpy (V_g, V_g_null);
+		    gsl_matrix_memcpy (V_e, V_e_null);
+		    gsl_matrix_memcpy (B, B_null);
+
+		    time_start=clock();
+
+		    //3 is before 1
+		    if (a_mode==3 || a_mode==4) {
+		      p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+		      if (p_score<p_nr && crt==1) {
+			logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+		      }
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      //calculate beta and Vbeta
+		      p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+		      if (p_lrt<p_nr) {
+			logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
 			//calculate beta and Vbeta
 			p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
 
-			if (p_lrt<p_nr) {
-				logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				//calculate beta and Vbeta
-				p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-				p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
-				if (crt==1) {
-					p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		if (a_mode==1 || a_mode==4) {
-			logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		    if (a_mode==1 || a_mode==4) {
+		      logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-			if (p_wald<p_nr) {
-				logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      if (p_wald<p_nr) {
+			logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-				if (crt==1) {
-					p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+		    //if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		//store summary data
-		//SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		for (size_t i=0; i<d_size; i++) {
-			v_beta[i]=gsl_vector_get (beta, i);
-		}
+		    //store summary data
+		    //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    for (size_t i=0; i<d_size; i++) {
+		      v_beta[i]=gsl_vector_get (beta, i);
+		    }
 
-		c=0;
-		for (size_t i=0; i<d_size; i++) {
-			for (size_t j=i; j<d_size; j++) {
-				v_Vg[c]=gsl_matrix_get (V_g, i, j);
-				v_Ve[c]=gsl_matrix_get (V_e, i, j);
-				v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-				c++;
-			}
-		}
+		    c=0;
+		    for (size_t i=0; i<d_size; i++) {
+		      for (size_t j=i; j<d_size; j++) {
+			v_Vg[c]=gsl_matrix_get (V_g, i, j);
+			v_Ve[c]=gsl_matrix_get (V_e, i, j);
+			v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+			c++;
+		      }
+		    }
 
-		MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
-		sumStat.push_back(SNPs);
-    }
+		    MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+		    sumStat.push_back(SNPs);
+		  }
+		}
+	}
 	cout<<endl;
 
 
@@ -3404,6 +3436,9 @@ void MVLMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 	gsl_matrix_free(B_null);
 	gsl_matrix_free(se_B_null);
 
+	gsl_matrix_free(Xlarge);
+	gsl_matrix_free(UtXlarge);
+
 	return;
 }
 
@@ -3430,6 +3465,12 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
 
 	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	//large matrices for EM
 	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
 	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
@@ -3615,6 +3656,7 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
 	gsl_matrix_memcpy (B_null, B);
 
 	//start reading genotypes and analyze
+	size_t csnp=0;
 	for (size_t t=0; t<indicator_snp.size(); ++t) {
 		//if (t>=1) {break;}
 		!safeGetline(infile, line).eof();
@@ -3647,86 +3689,111 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
 		for (size_t i=0; i<ni_test; ++i) {
 			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
 			geno=gsl_vector_get(x, i);
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//	gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		//initial values
-		gsl_matrix_memcpy (V_g, V_g_null);
-		gsl_matrix_memcpy (V_e, V_e_null);
-		gsl_matrix_memcpy (B, B_null);
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		csnp++;
 
-		time_start=clock();
+		if (csnp%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
 
-		//3 is before 1
-		if (a_mode==3 || a_mode==4) {
-			p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
-			if (p_score<p_nr && crt==1) {
-				logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-			}
-		}
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-		if (a_mode==2 || a_mode==4) {
-			logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		  gsl_matrix_set_zero (Xlarge);
+
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
+
+		    //initial values
+		    gsl_matrix_memcpy (V_g, V_g_null);
+		    gsl_matrix_memcpy (V_e, V_e_null);
+		    gsl_matrix_memcpy (B, B_null);
+
+		    time_start=clock();
+
+		    //3 is before 1
+		    if (a_mode==3 || a_mode==4) {
+		      p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+		      if (p_score<p_nr && crt==1) {
+			logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+		      }
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      //calculate beta and Vbeta
+		      p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+		      if (p_lrt<p_nr) {
+			logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
 			//calculate beta and Vbeta
 			p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
 
-			if (p_lrt<p_nr) {
-				logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				//calculate beta and Vbeta
-				p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-				p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
-				if (crt==1) {
-					p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		if (a_mode==1 || a_mode==4) {
-			logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		    if (a_mode==1 || a_mode==4) {
+		      logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-			if (p_wald<p_nr) {
-				logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      if (p_wald<p_nr) {
+			logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-				if (crt==1) {
-					p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+		    //if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		//store summary data
-		//SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		for (size_t i=0; i<d_size; i++) {
-			v_beta[i]=gsl_vector_get (beta, i);
-		}
+		    //store summary data
+		    //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    for (size_t i=0; i<d_size; i++) {
+		      v_beta[i]=gsl_vector_get (beta, i);
+		    }
 
-		c=0;
-		for (size_t i=0; i<d_size; i++) {
-			for (size_t j=i; j<d_size; j++) {
-				v_Vg[c]=gsl_matrix_get (V_g, i, j);
-				v_Ve[c]=gsl_matrix_get (V_e, i, j);
-				v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-				c++;
-			}
-		}
+		    c=0;
+		    for (size_t i=0; i<d_size; i++) {
+		      for (size_t j=i; j<d_size; j++) {
+			v_Vg[c]=gsl_matrix_get (V_g, i, j);
+			v_Ve[c]=gsl_matrix_get (V_e, i, j);
+			v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+			c++;
+		      }
+		    }
 
-		MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
-		sumStat.push_back(SNPs);
+		    MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+		    sumStat.push_back(SNPs);
+		  }
+		}
     }
 	cout<<endl;
 
@@ -3764,6 +3831,9 @@ void MVLMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gs
 	gsl_matrix_free(B_null);
 	gsl_matrix_free(se_B_null);
 
+	gsl_matrix_free(Xlarge);
+	gsl_matrix_free(UtXlarge);
+
 	return;
 }
 
@@ -3795,6 +3865,12 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
 	size_t n_size=UtY->size1, d_size=UtY->size2, c_size=UtW->size2;
 	size_t dc_size=d_size*(c_size+1), v_size=d_size*(d_size+1)/2;
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	//large matrices for EM
 	gsl_matrix *U_hat=gsl_matrix_alloc (d_size, n_size);
 	gsl_matrix *E_hat=gsl_matrix_alloc (d_size, n_size);
@@ -3992,6 +4068,7 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
 		b=ch[0];
 	}
 
+	size_t csnp=0;
 	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
 		if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);}
 		if (indicator_snp[t]==0) {continue;}
@@ -4030,9 +4107,9 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
 		for (size_t i=0; i<ni_test; ++i) {
 			geno=gsl_vector_get(x,i);
 			if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//	gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
 		/*
@@ -4047,85 +4124,110 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
 		}
 		*/
 
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, &X_row.vector);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		//initial values
-		gsl_matrix_memcpy (V_g, V_g_null);
-		gsl_matrix_memcpy (V_e, V_e_null);
-		gsl_matrix_memcpy (B, B_null);
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, csnp%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		csnp++;
 
-		time_start=clock();
+		if (csnp%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (csnp%msize==0) {l=msize;} else {l=csnp%msize;}
 
-		//3 is before 1
-		if (a_mode==3 || a_mode==4) {
-			p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-			if (p_score<p_nr && crt==1) {
-				logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
-			}
-		}
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		  gsl_matrix_set_zero (Xlarge);
+
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (&X_row.vector, &UtXlarge_col.vector);
+
+		    //initial values
+		    gsl_matrix_memcpy (V_g, V_g_null);
+		    gsl_matrix_memcpy (V_e, V_e_null);
+		    gsl_matrix_memcpy (B, B_null);
+
+		    time_start=clock();
+
+		    //3 is before 1
+		    if (a_mode==3 || a_mode==4) {
+		      p_score=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g_null, V_e_null, UltVehiY, beta, Vbeta);
+
+		      if (p_score<p_nr && crt==1) {
+			logl_H1=MphNR ('R', 1, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_score=PCRT (3, d_size, p_score, crt_a, crt_b, crt_c);
+		      }
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      //calculate beta and Vbeta
+		      p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
+
+		      if (p_lrt<p_nr) {
+			logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
 
-		if (a_mode==2 || a_mode==4) {
-			logl_H1=MphEM ('L', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
 			//calculate beta and Vbeta
 			p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-
-			if (p_lrt<p_nr) {
-				logl_H1=MphNR ('L', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-
-				//calculate beta and Vbeta
-				p_lrt=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
-				p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_H0), (double)d_size );
-				if (crt==1) {
-					p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_lrt=PCRT (2, d_size, p_lrt, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		if (a_mode==1 || a_mode==4) {
-			logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		    if (a_mode==1 || a_mode==4) {
+		      logl_H1=MphEM ('R', em_iter/10, em_prec*10, eval, X, Y, U_hat, E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+		      p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-			if (p_wald<p_nr) {
-				logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-				p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
+		      if (p_wald<p_nr) {
+			logl_H1=MphNR ('R', nr_iter/10, nr_prec*10, eval, X, Y, Hi_all, xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
+			p_wald=MphCalcP (eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e, UltVehiY, beta, Vbeta);
 
-				if (crt==1) {
-					p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
-				}
+			if (crt==1) {
+			  p_wald=PCRT (1, d_size, p_wald, crt_a, crt_b, crt_c);
 			}
-		}
+		      }
+		    }
 
-		//cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
+		    //cout<<setprecision(10)<<p_wald<<"\t"<<p_lrt<<"\t"<<p_score<<endl;
 
-		if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
+		    //if (x_mean>1) {gsl_vector_scale(beta, -1.0);}
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		//store summary data
-		//SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		for (size_t i=0; i<d_size; i++) {
-			v_beta[i]=gsl_vector_get (beta, i);
-		}
+		    //store summary data
+		    //SUMSTAT SNPs={snpInfo[t].get_chr(), snpInfo[t].get_rs(), snpInfo[t].get_pos(), n_miss, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    for (size_t i=0; i<d_size; i++) {
+		      v_beta[i]=gsl_vector_get (beta, i);
+		    }
 
-		c=0;
-		for (size_t i=0; i<d_size; i++) {
-			for (size_t j=i; j<d_size; j++) {
-				v_Vg[c]=gsl_matrix_get (V_g, i, j);
-				v_Ve[c]=gsl_matrix_get (V_e, i, j);
-				v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
-				c++;
-			}
-		}
+		    c=0;
+		    for (size_t i=0; i<d_size; i++) {
+		      for (size_t j=i; j<d_size; j++) {
+			v_Vg[c]=gsl_matrix_get (V_g, i, j);
+			v_Ve[c]=gsl_matrix_get (V_e, i, j);
+			v_Vbeta[c]=gsl_matrix_get (Vbeta, i, j);
+			c++;
+		      }
+		    }
 
-		MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
-		sumStat.push_back(SNPs);
-    }
+		    MPHSUMSTAT SNPs={v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
+		    sumStat.push_back(SNPs);
+		  }
+		}
+	}
 	cout<<endl;
 
 	//cout<<"time_opt = "<<time_opt<<endl;
@@ -4162,6 +4264,9 @@ void MVLMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl
 	gsl_matrix_free(B_null);
 	gsl_matrix_free(se_B_null);
 
+	gsl_matrix_free(Xlarge);
+	gsl_matrix_free(UtXlarge);
+
 	return;
 }