about summary refs log tree commit diff
path: root/src/lmm.cpp
diff options
context:
space:
mode:
authorxiangzhou2016-05-23 17:05:35 -0400
committerxiangzhou2016-05-23 17:05:35 -0400
commit943e970c9cbc184dcca679fbe455f48c32242cdc (patch)
tree70369d969dee3432e09e345c8106a541ac0d5a76 /src/lmm.cpp
parent3ab77854a52ac016b9e2b824858f5f0c695d4170 (diff)
downloadpangemma-943e970c9cbc184dcca679fbe455f48c32242cdc.tar.gz
version 0.95alpha
Diffstat (limited to 'src/lmm.cpp')
-rw-r--r--src/lmm.cpp267
1 files changed, 185 insertions, 82 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 7bcf89a..af6ff8a 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -42,6 +42,7 @@
 #include "gsl/gsl_integration.h"
 
 #include "io.h"
+#include "eigenlib.h"
 #include "lapack.h"
 #include "gzstream.h"
 
@@ -1228,6 +1229,12 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
 	gsl_vector *ab=gsl_vector_alloc (n_index);
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	gsl_matrix_set_zero (Uab);
 	CalcUab (UtW, Uty, Uab);
 //	if (e_mode!=0) {
@@ -1236,6 +1243,7 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 //	}
 
 	//start reading genotypes and analyze
+	size_t c=0;
 	for (size_t t=0; t<indicator_snp.size(); ++t) {
 //		if (t>1) {break;}
 		!safeGetline(infile, line).eof();
@@ -1268,48 +1276,72 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 		for (size_t i=0; i<ni_test; ++i) {
 			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
 			geno=gsl_vector_get(x, i);
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//	gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
-
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		CalcUab(UtW, Uty, Utx, Uab);
-//		if (e_mode!=0) {
-//			Calcab (W, y, x, ab);
-//		}
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		c++;
 
-		time_start=clock();
-		FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+		if (c%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (c%msize==0) {l=msize;} else {l=c%msize;}
 
-		//3 is before 1
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		}
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-		if (a_mode==1 || a_mode==4) {
-			CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
-			CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		if (a_mode==2 || a_mode==4) {
-			CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		}
+		  gsl_matrix_set_zero (Xlarge);
 
-		if (x_mean>1) {beta*=-1;}
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    CalcUab(UtW, Uty, Utx, Uab);
+		    //		if (e_mode!=0) {
+		    //			Calcab (W, y, x, ab);
+		    //		}
 
-		//store summary data
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
-    }
+		    time_start=clock();
+		    FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+		    //3 is before 1
+		    if (a_mode==3 || a_mode==4) {
+		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
+		    }
+
+		    if (a_mode==1 || a_mode==4) {
+		      CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+		    }
+
+		    //if (x_mean>1) {beta*=-1;}
+
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		    //store summary data
+		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    sumStat.push_back(SNPs);
+		  }
+		}
+	}
 	cout<<endl;
 
 	gsl_vector_free (x);
@@ -1318,6 +1350,9 @@ void LMM::AnalyzeBimbam (const gsl_matrix *U, const gsl_vector *eval, const gsl_
 	gsl_matrix_free (Uab);
 	gsl_vector_free (ab);
 
+	gsl_matrix_free (Xlarge);
+	gsl_matrix_free (UtXlarge);
+
 	infile.close();
 	infile.clear();
 
@@ -1354,6 +1389,12 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
 	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
 	gsl_vector *ab=gsl_vector_alloc (n_index);
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	gsl_matrix_set_zero (Uab);
 	CalcUab (UtW, Uty, Uab);
 //	if (e_mode!=0) {
@@ -1371,7 +1412,7 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
 		b=ch[0];
 	}
 
-
+	size_t c=0;
 	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
 		if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);}
 		if (indicator_snp[t]==0) {continue;}
@@ -1406,46 +1447,71 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
 		for (size_t i=0; i<ni_test; ++i) {
 			geno=gsl_vector_get(x,i);
 			if (geno==-9) {gsl_vector_set(x, i, x_mean); geno=x_mean;}
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		CalcUab(UtW, Uty, Utx, Uab);
-//		if (e_mode!=0) {
-//			Calcab (W, y, x, ab);
-//		}
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		c++;
 
-		time_start=clock();
-		FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+		if (c%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (c%msize==0) {l=msize;} else {l=c%msize;}
 
-		//3 is before 1, for beta
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		}
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-		if (a_mode==1 || a_mode==4) {
-			CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
-			CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		if (a_mode==2 || a_mode==4) {
-			CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		}
+		  gsl_matrix_set_zero (Xlarge);
 
-		if (x_mean>1) {beta*=-1;}
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    CalcUab(UtW, Uty, Utx, Uab);
+		    //		if (e_mode!=0) {
+		    //			Calcab (W, y, x, ab);
+		    //		}
 
-		//store summary data
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
+		    time_start=clock();
+		    FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+		    //3 is before 1, for beta
+		    if (a_mode==3 || a_mode==4) {
+		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
+		    }
+
+		    if (a_mode==1 || a_mode==4) {
+		      CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+		    }
+
+		    //if (x_mean>1) {beta*=-1;}
+
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		    //store summary data
+		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    sumStat.push_back(SNPs);
+		  }
+		}
     }
 	cout<<endl;
 
@@ -1454,6 +1520,9 @@ void LMM::AnalyzePlink (const gsl_matrix *U, const gsl_vector *eval, const gsl_m
 	gsl_matrix_free (Uab);
 	gsl_vector_free (ab);
 
+	gsl_matrix_free(Xlarge);
+	gsl_matrix_free(UtXlarge);
+
 	infile.close();
 	infile.clear();
 
@@ -1487,6 +1556,12 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
 	gsl_matrix *Uab=gsl_matrix_alloc (U->size2, n_index);
 	gsl_vector *ab=gsl_vector_alloc (n_index);
 
+	//create a large matrix
+	size_t msize=10000;
+	gsl_matrix *Xlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix *UtXlarge=gsl_matrix_alloc (U->size1, msize);
+	gsl_matrix_set_zero(Xlarge);
+
 	gsl_matrix_set_zero (Uab);
 	CalcUab (UtW, Uty, Uab);
 //	if (e_mode!=0) {
@@ -1537,6 +1612,7 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
 
 
 	//start reading genotypes and analyze
+	size_t c=0;
 	for (size_t t=0; t<indicator_snp.size(); ++t)
 	{
 
@@ -1645,47 +1721,71 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
 		for (size_t i=0; i<ni_test; ++i) {
 			if (gsl_vector_get (x_miss, i)==0) {gsl_vector_set(x, i, x_mean);}
 			geno=gsl_vector_get(x, i);
-			if (x_mean>1) {
-				gsl_vector_set(x, i, 2-geno);
-			}
+			//if (x_mean>1) {
+			//gsl_vector_set(x, i, 2-geno);
+			//}
 		}
 
-
+		/*
 		//calculate statistics
 		time_start=clock();
 		gsl_blas_dgemv (CblasTrans, 1.0, U, x, 0.0, Utx);
 		time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		*/
 
-		CalcUab(UtW, Uty, Utx, Uab);
-//		if (e_mode!=0) {
-//			Calcab (W, y, x, ab);
-//		}
+		gsl_vector_view Xlarge_col=gsl_matrix_column (Xlarge, c%msize);
+		gsl_vector_memcpy (&Xlarge_col.vector, x);
+		c++;
 
-		time_start=clock();
-		FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+		if (c%msize==0 || t==indicator_snp.size()-1 ) {
+		  size_t l=0;
+		  if (c%msize==0) {l=msize;} else {l=c%msize;}
 
-		//3 is before 1
-		if (a_mode==3 || a_mode==4) {
-			CalcRLScore (l_mle_null, param1, beta, se, p_score);
-		}
+		  gsl_matrix_view Xlarge_sub=gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
+		  gsl_matrix_view UtXlarge_sub=gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
-		if (a_mode==1 || a_mode==4) {
-			CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
-			CalcRLWald (lambda_remle, param1, beta, se, p_wald);
-		}
+		  time_start=clock();
+		  eigenlib_dgemm ("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0, &UtXlarge_sub.matrix);
+		  time_UtX+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
 
-		if (a_mode==2 || a_mode==4) {
-			CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-			p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
-		}
+		  gsl_matrix_set_zero (Xlarge);
 
-		if (x_mean>1) {beta*=-1;}
+		  for (size_t i=0; i<l; i++) {
+		    gsl_vector_view UtXlarge_col=gsl_matrix_column (UtXlarge, i);
+		    gsl_vector_memcpy (Utx, &UtXlarge_col.vector);
 
-		time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+		    CalcUab(UtW, Uty, Utx, Uab);
+		    //		if (e_mode!=0) {
+		    //			Calcab (W, y, x, ab);
+		    //		}
 
-		//store summary data
-		SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
-		sumStat.push_back(SNPs);
+		    time_start=clock();
+		    FUNC_PARAM param1={false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+		    //3 is before 1
+		    if (a_mode==3 || a_mode==4) {
+		      CalcRLScore (l_mle_null, param1, beta, se, p_score);
+		    }
+
+		    if (a_mode==1 || a_mode==4) {
+		      CalcLambda ('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+		      CalcRLWald (lambda_remle, param1, beta, se, p_wald);
+		    }
+
+		    if (a_mode==2 || a_mode==4) {
+		      CalcLambda ('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+		      p_lrt=gsl_cdf_chisq_Q (2.0*(logl_H1-logl_mle_H0), 1);
+		    }
+
+		    //if (x_mean>1) {beta*=-1;}
+
+		    time_opt+=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);
+
+		    //store summary data
+		    SUMSTAT SNPs={beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score};
+		    sumStat.push_back(SNPs);
+		  }
+		}
     }
 	cout<<endl;
 
@@ -1695,6 +1795,9 @@ void LMM::Analyzebgen (const gsl_matrix *U, const gsl_vector *eval, const gsl_ma
 	gsl_matrix_free (Uab);
 	gsl_vector_free (ab);
 
+	gsl_matrix_free(Xlarge);
+	gsl_matrix_free(UtXlarge);
+
 	infile.close();
 	infile.clear();