Removed FORCE_FLOAT from prdt.h/prdt.cpp.

author: Peter Carbonetto 2017-06-04 12:06:36 -0500
committer: Peter Carbonetto 2017-06-04 12:06:36 -0500
commit: c1132606169875be6d07b54b30e8ae9446341bc2 (patch)
tree: 13019a8101d2278ab1a928481979cca9c7ee6009 /src
parent: 079d7deb888936fe174746d1efd7cd7ed6a511dd (diff)
download: pangemma-c1132606169875be6d07b54b30e8ae9446341bc2.tar.gz
6 files changed, 274 insertions, 280 deletions
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index 14ffbf1..7ad250f 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -1,6 +1,6 @@
 /*
-	Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011  Xiang Zhou
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright (C) 2011-2017, Xiang Zhou
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -13,7 +13,7 @@
     GNU General Public License for more details.
 
     You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include <iostream>
@@ -27,19 +27,23 @@
 using namespace std;
 using namespace Eigen;
 
-
-
-//on two different clusters, compare eigen vs lapack/gsl
-//dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
-//dgemv, 20x or 4x faster than gsl, 
-//eigen, 1x or 0.3x slower than lapack
-//invert, 20x or 10x faster than lapack
-
-void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C)
-{
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) );
+// On two different clusters, compare eigen vs lapack/gsl:
+// 
+// dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
+// dgemv, 20x or 4x faster than gsl, 
+// eigen, 1x or 0.3x slower than lapack
+// invert, 20x or 10x faster than lapack
+//
+void eigenlib_dgemm (const char *TransA, const char *TransB,
+		     const double alpha, const gsl_matrix *A,
+		     const gsl_matrix *B, const double beta,
+		     gsl_matrix *C) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) );
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) );
 
   if (*TransA=='N' || *TransA=='n') {
     if (*TransB=='N' || *TransB=='n') {
@@ -55,19 +59,18 @@ void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha,
     }
   }
 
-  //gsl_matrix_view C_view = gsl_matrix_view_array (C_mat.data(), C->size1, C->size2);
-  //gsl_matrix_memcpy (C, &C_view.matrix);
-
   return;
 }
 
-
-
-void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A, const gsl_vector *x, const double beta, gsl_vector *y)
-{
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
-  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) );
-  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) );
+void eigenlib_dgemv (const char *TransA, const double alpha,
+		     const gsl_matrix *A, const gsl_vector *x,
+		     const double beta, gsl_vector *y) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) );
+  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
+    x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) );
+  Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> >
+    y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) );
 
   if (*TransA=='N' || *TransA=='n') {
     y_vec=alpha*A_mat*x_vec+beta*y_vec;
@@ -78,38 +81,35 @@ void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A
   return;
 }
 
-
-
-void eigenlib_invert(gsl_matrix *A)
-{
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor> > A_mat(A->data, A->size1, A->size2);
+void eigenlib_invert(gsl_matrix *A) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
+    A_mat(A->data, A->size1, A->size2);
   A_mat=A_mat.inverse();
   return;
 }
 
-
-void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A)
-{
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor> > A_mat(A->data, A->size1, A->size2);
-  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) );
-
+void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor> >
+    A_mat(A->data, A->size1, A->size2);
+  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
+    b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) );
   A_mat=alpha*b_vec*b_vec.transpose()+A_mat;
-
   return;
 }
 
-
-void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval)
-{
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) );
-  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  > U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) );
-  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) );
+void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U,
+			 gsl_vector *eval) {
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) );
+  Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic>  >
+    U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) );
+  Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> >
+    eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) );
 
   SelfAdjointEigenSolver<MatrixXd> es(G_mat);
-  if (es.info() != Success) abort();
-
+  if (es.info() != Success)
+    abort();
   eval_vec=es.eigenvalues();
   U_mat=es.eigenvectors();
-
   return;
 }
diff --git a/src/eigenlib.h b/src/eigenlib.h
index f869786..8cb8880 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -23,8 +23,13 @@
 
 using namespace std;
 
-void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C);
-void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A, const gsl_vector *x, const double beta, gsl_vector *y);
+void eigenlib_dgemm (const char *TransA, const char *TransB,
+		     const double alpha, const gsl_matrix *A,
+		     const gsl_matrix *B, const double beta,
+		     gsl_matrix *C);
+void eigenlib_dgemv (const char *TransA, const double alpha,
+		     const gsl_matrix *A, const gsl_vector *x,
+		     const double beta, gsl_vector *y);
 void eigenlib_invert(gsl_matrix *A);
 void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A);
 void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval);
diff --git a/src/logistic.cpp b/src/logistic.cpp
index 1b47946..002ce98 100644
--- a/src/logistic.cpp
+++ b/src/logistic.cpp
@@ -7,45 +7,40 @@
 #include <gsl/gsl_linalg.h>
 #include "logistic.h"
 
-// I need to bundle all the data that goes to the function to optimze together. 
+// I need to bundle all the data that goes to the function to optimze
+// together.
 typedef struct{
   gsl_matrix_int *X;
   gsl_vector_int *nlev;
   gsl_vector *y;
-  gsl_matrix *Xc;   // continuous covariates  Matrix Nobs x Kc (NULL if not used)
+  gsl_matrix *Xc;  // continuous covariates matrix Nobs x Kc (NULL if not used)
   double lambdaL1;
   double lambdaL2;
-}fix_parm_mixed_T;
-
-
-
-
-
-
-double fLogit_mixed(gsl_vector *beta
-		    ,gsl_matrix_int *X
-		    ,gsl_vector_int *nlev
-		    ,gsl_matrix *Xc
-		    ,gsl_vector *y
-		    ,double lambdaL1
-		    ,double lambdaL2)
-{
+} fix_parm_mixed_T;
+
+double fLogit_mixed(gsl_vector *beta,
+		    gsl_matrix_int *X,
+		    gsl_vector_int *nlev,
+		    gsl_matrix *Xc,
+		    gsl_vector *y,
+		    double lambdaL1,
+		    double lambdaL2) {
   int n = y->size; 
-  //  int k = X->size2; 
   int npar = beta->size; 
   double total = 0;
   double aux = 0;
-  /*   omp_set_num_threads(ompthr); */
-  /*   /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
-  /*   /\*#pragma omp parallel for reduction (+:total)*\/ */
+  // Changed loop start at 1 instead of 0 to avoid regularization of
+  // beta_0*\/ */
+  // #pragma omp parallel for reduction (+:total)
   for(int i = 1; i < npar; ++i)
     total += beta->data[i]*beta->data[i];
   total = (-total*lambdaL2/2);
-  /*   /\*#pragma omp parallel for reduction (+:aux)*\/ */
+  // #pragma omp parallel for reduction (+:aux)
   for(int i = 1; i < npar; ++i)
     aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
   total = total-aux*lambdaL1;
-  /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+  // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
+  // #reduction (+:total)
   for(int i = 0; i < n; ++i) {
     double Xbetai=beta->data[0];
     int iParm=1;
@@ -94,11 +89,12 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
   int n = p->y->size; 
   int K = p->X->size2; 
   int Kc = p->Xc->size2; 
-  int npar = beta->size; 
+  int npar = beta->size;
+  
   // Intitialize gradient out necessary?
   for(int i = 0; i < npar; ++i) 
     out->data[i]= 0; 
-  /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+  // Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
   for(int i = 1; i < npar; ++i)
     out->data[i]= p->lambdaL2*beta->data[i]; 
   for(int i = 1; i < npar; ++i)
@@ -113,7 +109,8 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
 	Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
       iParm+=p->nlev->data[k]-1;
     }
-    // Adding the continuous
+    
+    // Adding the continuous.
     for(int k = 0; k < Kc; ++k) 
       Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
 
@@ -126,7 +123,8 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
 	out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
       iParm+=p->nlev->data[k]-1;
     }
-    // Adding the continuous
+    
+    // Adding the continuous.
     for(int k = 0; k < Kc; ++k) {
       out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
     }
@@ -134,12 +132,9 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params,
 
 }
 
-
-/* The Hessian of f */
-void 
-wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params, 
-			  gsl_matrix *out)
-{
+// The Hessian of f.
+void  wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params, 
+				gsl_matrix *out) {
   fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
   int n = p->y->size; 
   int K = p->X->size2; 
diff --git a/src/logistic.h b/src/logistic.h
index a68ee09..e951935 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -1,52 +1,54 @@
 #ifndef LOGISTIC_H_   /* Include guard */
 #define LOGISTIC_H_
 
-/* Mixed interface */
-void logistic_mixed_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
-			 ,gsl_matrix_int *X  //Matrix Nobs x K 
-			 ,gsl_vector_int *nlev // Vector with number categories
-			 ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
-			 ,gsl_vector *yhat //Vector of prob. predicted by the logistic
-			 );
+// Mixed interface.
+void logistic_mixed_pred(gsl_vector *beta,     // Vector of parameters
+					       // length = 1+Sum_k(C_k-1)+Kc.
+			 gsl_matrix_int *X,    // Matrix Nobs x K.
+			 gsl_vector_int *nlev, // Vector with num. categories.
+			 gsl_matrix *Xc,       // Continuous covariates matrix
+					       // Nobs x Kc
+			 gsl_vector *yhat);    // Vector of prob. predicted by
+					       // the logistic.
  
-int logistic_mixed_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
-		       ,gsl_matrix_int *X  //Matrix Nobs x K 
-		       ,gsl_vector_int *nlev // Vector with number categories
-		       ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
-		       ,gsl_vector *y //Vector of prob. to predict
-		       ,double lambdaL1 // Regularization L1 0.0 if not used
-		       ,double lambdaL2); // Regularization L2 0.0 if not used
+int logistic_mixed_fit(gsl_vector *beta,     // Vector of parameters
+					     // length = 1+Sum_k(C_k-1)+Kc
+		       gsl_matrix_int *X,    // Matrix Nobs x K.
+		       gsl_vector_int *nlev, // Vector with number categories.
+		       gsl_matrix *Xc,       // Continuous covariates
+					     // matrix Nobs x Kc
+		       gsl_vector *y,        // Vector of prob. to predict.
+		       double lambdaL1,      // Reg. L1 0.0 if not used.
+		       double lambdaL2);     // Reg. L2 0.0 if not used.
 
-double fLogit_mixed(gsl_vector *beta
-		    ,gsl_matrix_int *X
-		    ,gsl_vector_int *nlev
-		    ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
-		    ,gsl_vector *y
-		    ,double lambdaL1
-		    ,double lambdaL2);
+double fLogit_mixed(gsl_vector *beta,
+		    gsl_matrix_int *X,
+		    gsl_vector_int *nlev,
+		    gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc 
+		    gsl_vector *y,
+		    double lambdaL1,
+		    double lambdaL2);
 
 
-/* Categorical only interface */
-void logistic_cat_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
-		       ,gsl_matrix_int *X  //Matrix Nobs x K 
-		       ,gsl_vector_int *nlev // Vector with number categories
-		       ,gsl_vector *yhat //Vector of prob. predicted by the logistic
-		       );
+// Categorical-only interface.
+void logistic_cat_pred(gsl_vector *beta,  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		       gsl_matrix_int *X,  //Matrix Nobs x K 
+		       gsl_vector_int *nlev, // Vector with number categories
+		       gsl_vector *yhat); //Vector of prob. predicted by the logistic
  
-int logistic_cat_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
-		     ,gsl_matrix_int *X  //Matrix Nobs x K 
-		     ,gsl_vector_int *nlev // Vector with number categories
-		     ,gsl_vector *y //Vector of prob. to predict
-		     ,double lambdaL1 // Regularization L1 0.0 if not used
-		     ,double lambdaL2); // Regularization L2 0.0 if not used
-
-double fLogit_cat(gsl_vector *beta
-		  ,gsl_matrix_int *X
-		  ,gsl_vector_int *nlev
-		  ,gsl_vector *y
-		  ,double lambdaL1
-		  ,double lambdaL2);
+int logistic_cat_fit(gsl_vector *beta,  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+		     gsl_matrix_int *X,  //Matrix Nobs x K 
+		     gsl_vector_int *nlev, // Vector with number categories
+		     gsl_vector *y, //Vector of prob. to predict
+		     double lambdaL1, // Regularization L1 0.0 if not used
+		     double lambdaL2); // Regularization L2 0.0 if not used
 
+double fLogit_cat(gsl_vector *beta,
+		  gsl_matrix_int *X,
+		  gsl_vector_int *nlev,
+		  gsl_vector *y,
+		  double lambdaL1,
+		  double lambdaL2);
 
 /* Continuous only interface */
 void logistic_cont_pred(gsl_vector *beta  // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
@@ -60,11 +62,10 @@ int logistic_cont_fit(gsl_vector *beta  // Vector of parameters length = 1 + Sum
 		      ,double lambdaL1 // Regularization L1 0.0 if not used
 		      ,double lambdaL2); // Regularization L2 0.0 if not used
 
-double fLogit_cont(gsl_vector *beta
-		   ,gsl_matrix *Xc   // continuous covariates  Matrix Nobs x Kc 
-		   ,gsl_vector *y
-		   ,double lambdaL1
-		   ,double lambdaL2);
-
+double fLogit_cont(gsl_vector *beta,
+		   gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc .
+		   gsl_vector *y,
+		   double lambdaL1,
+		   double lambdaL2);
 
 #endif // LOGISTIC_H_
diff --git a/src/prdt.cpp b/src/prdt.cpp
index 2875119..db0fa14 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -1,6 +1,6 @@
 /*
  Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011  Xiang Zhou
+ Copyright (C) 2011-2017, Xiang Zhou
  
  This program is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
@@ -13,10 +13,8 @@
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
- along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
 
 #include <iostream>
 #include <sstream>
@@ -33,28 +31,16 @@
 #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_blas.h"
 
-
 #include "io.h"
-#include "lapack.h"  //for functions EigenDecomp
+#include "lapack.h"
 #include "gzstream.h"
-
-#ifdef FORCE_FLOAT
-#include "io_float.h"
-#include "prdt_float.h"
-#include "mathfunc_float.h"
-#else
 #include "io.h"
 #include "prdt.h"
 #include "mathfunc.h"
-#endif
 
 using namespace std;
 
-
-
-
-void PRDT::CopyFromParam (PARAM &cPar) 
-{
+void PRDT::CopyFromParam (PARAM &cPar) {
 	a_mode=cPar.a_mode;
 	d_pace=cPar.d_pace;
 	
@@ -81,19 +67,14 @@ void PRDT::CopyFromParam (PARAM &cPar)
 	return;
 }
 
-void PRDT::CopyToParam (PARAM &cPar) 
-{
+void PRDT::CopyToParam (PARAM &cPar) {
 	cPar.ns_test=ns_test;
 	cPar.time_eigen=time_eigen;
 	
 	return;
 }               
 
-
-
-
-void PRDT::WriteFiles (gsl_vector *y_prdt) 
-{
+void PRDT::WriteFiles (gsl_vector *y_prdt) {
 	string file_str;
 	file_str=path_out+"/"+file_out;
 	file_str+=".";
@@ -101,7 +82,10 @@ void PRDT::WriteFiles (gsl_vector *y_prdt)
 	file_str+=".txt";
 	
 	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+	if (!outfile) {
+	  cout<<"error writing file: "<<file_str.c_str()<<endl;
+	  return;
+	}
 	
 	size_t ci_test=0;
 	for (size_t i=0; i<indicator_idv.size(); i++) {
@@ -118,15 +102,16 @@ void PRDT::WriteFiles (gsl_vector *y_prdt)
 	return;
 }
 
-
-void PRDT::WriteFiles (gsl_matrix *Y_full) 
-{
+void PRDT::WriteFiles (gsl_matrix *Y_full)  {
 	string file_str;
 	file_str=path_out+"/"+file_out;
 	file_str+=".prdt.txt";
 	
 	ofstream outfile (file_str.c_str(), ofstream::out);
-	if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;}
+	if (!outfile) {
+	  cout<<"error writing file: "<<file_str.c_str()<<endl;
+	  return;
+	}
 	
 	size_t ci_test=0;
 	for (size_t i=0; i<indicator_cvt.size(); i++) {
@@ -134,7 +119,8 @@ void PRDT::WriteFiles (gsl_matrix *Y_full)
 			outfile<<"NA"<<endl;
 		} else {
 			for (size_t j=0; j<Y_full->size2; j++) {
-				outfile<<gsl_matrix_get (Y_full, ci_test, j)<<"\t";
+				outfile << gsl_matrix_get(Y_full,ci_test,j) <<
+				  "\t";
 			}
 			outfile<<endl;
 			ci_test++;
@@ -146,11 +132,7 @@ void PRDT::WriteFiles (gsl_matrix *Y_full)
 	return;
 }
 
-
-
-
-void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) 
-{
+void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) {
 	size_t ni_test=u_hat->size, ni_total=G->size1;
 	
 	gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test);
@@ -190,7 +172,9 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt)
 	clock_t time_start=clock();
 	EigenDecomp (Goo, U, eval, 0);
 	for (size_t i=0; i<eval->size; i++) {
-		if (gsl_vector_get(eval,i)<1e-10) {gsl_vector_set(eval, i, 0);}
+		if (gsl_vector_get(eval,i)<1e-10) {
+		  gsl_vector_set(eval, i, 0);
+		}
 	}
 
 	time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0);	
@@ -198,12 +182,15 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt)
 	gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu);
 	for (size_t i=0; i<eval->size; i++) {
 		d=gsl_vector_get(eval, i);
-		if (d!=0) {d=gsl_vector_get(Utu, i)/d; gsl_vector_set(Utu, i, d);}
+		if (d!=0) {
+		  d=gsl_vector_get(Utu, i)/d;
+		  gsl_vector_set(Utu, i, d);
+		}
 	}
 	gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval);
 	gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt);
 	
-	//free matrices
+	// Free matrices.
 	gsl_matrix_free(Goo);
 	gsl_matrix_free(Gfo);
 	gsl_matrix_free(U);
@@ -215,13 +202,12 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt)
 	return;	
 }
 
-
-
-void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) 
-{
+void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) {
 	igzstream infile (file_geno.c_str(), igzstream::in);
-//	ifstream infile (file_geno.c_str(), ifstream::in);
-	if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;}
+	if (!infile) {
+	  cout<<"error reading genotype file:"<<file_geno<<endl;
+	  return;
+	}
 	
 	string line;
 	char *ch_ptr;
@@ -235,32 +221,44 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt)
 	
 	ns_test=0;
 
-	//start reading genotypes and analyze	
+	// Start reading genotypes and analyze.
 	for (size_t t=0; t<ns_total; ++t) {
 		!safeGetline(infile, line).eof();
-		if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs  ", t, ns_total-1);}
+		if (t%d_pace==0 || t==(ns_total-1)) {
+		  ProgressBar ("Reading SNPs  ", t, ns_total-1);
+		}
 		
 		ch_ptr=strtok ((char *)line.c_str(), " , \t");
 		rs=ch_ptr;
 		ch_ptr=strtok (NULL, " , \t");
 		ch_ptr=strtok (NULL, " , \t");		
 		
-		if (mapRS2est.count(rs)==0) {continue;} else {effect_size=mapRS2est[rs];}
+		if (mapRS2est.count(rs)==0) {
+		  continue;
+		} else {
+		  effect_size=mapRS2est[rs];
+		}
+		
+		x_mean=0.0;
+		c_phen=0;
+		n_miss=0;
+		x_train_mean=0;
+		n_train_nomiss=0;
 		
-		x_mean=0.0; c_phen=0; n_miss=0; x_train_mean=0; n_train_nomiss=0;
 		gsl_vector_set_zero(x_miss);
 
 		for (size_t i=0; i<indicator_idv.size(); ++i) {
 			ch_ptr=strtok (NULL, " , \t");
 			if (indicator_idv[i]==1) {
 				if (strcmp(ch_ptr, "NA")!=0) {
-					geno=atof(ch_ptr); 			
+					geno=atof(ch_ptr);
 					x_train_mean+=geno;
 					n_train_nomiss++;
 				}
 			} else {
 				if (strcmp(ch_ptr, "NA")==0) {
-					gsl_vector_set(x_miss, c_phen, 0.0); n_miss++;
+					gsl_vector_set(x_miss, c_phen, 0.0);
+					n_miss++;
 				} else {
 					geno=atof(ch_ptr); 	
 					
@@ -272,7 +270,11 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt)
 			}
 		}
 
-		if (x->size==n_miss) {cout<<"snp "<<rs<<" has missing genotype for all individuals and will be ignored."<<endl; continue;}
+		if (x->size==n_miss) {
+		  cout << "snp " << rs << " has missing genotype for all " <<
+		    "individuals and will be ignored." << endl;
+		  continue;}
+		
 
 		x_mean/=(double)(x->size-n_miss);
 		x_train_mean/=(double)(n_train_nomiss);
@@ -303,17 +305,13 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt)
 	return;
 }
 
-
-
-
-
-
-
-void PRDT::AnalyzePlink (gsl_vector *y_prdt) 
-{
+void PRDT::AnalyzePlink (gsl_vector *y_prdt) {
 	string file_bed=file_bfile+".bed";
 	ifstream infile (file_bed.c_str(), ios::binary);
-	if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;}
+	if (!infile) {
+	  cout<<"error reading bed file:"<<file_bed<<endl;
+	  return;
+	}
 	
 	char ch[1];
 	bitset<8> b;	
@@ -324,11 +322,11 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt)
 	
 	gsl_vector *x=gsl_vector_alloc (y_prdt->size);
 	
-	//calculate n_bit and c, the number of bit for each snp
+	// Calculate n_bit and c, the number of bit for each SNP.
 	if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;}
 	else {n_bit=indicator_idv.size()/4+1; }
 	
-	//print the first three majic numbers
+	// Print the first 3 magic numbers.
 	for (size_t i=0; i<3; ++i) {
 		infile.read(ch,1);
 		b=ch[0];
@@ -337,39 +335,71 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt)
 	ns_test=0;
 	
 	for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) {
-		if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);}
-		//if (indicator_snp[t]==0) {continue;}
+		if (t%d_pace==0 || t==snpInfo.size()-1) {
+		  ProgressBar ("Reading SNPs  ", t, snpInfo.size()-1);
+		}
 		
 		rs=snpInfo[t].rs_number;
 		
-		if (mapRS2est.count(rs)==0) {continue;} else {effect_size=mapRS2est[rs];}
-		
-		infile.seekg(t*n_bit+3);		//n_bit, and 3 is the number of magic numbers
+		if (mapRS2est.count(rs)==0) {
+		  continue;
+		} else {
+		  effect_size=mapRS2est[rs];
+		}
+
+		// n_bit, and 3 is the number of magic numbers.
+		infile.seekg(t*n_bit+3);
 		
-		//read genotypes
-		x_mean=0.0;	n_miss=0; ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0;
+		// Read genotypes.
+		x_mean=0.0;
+		n_miss=0;
+		ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0;
 		for (size_t i=0; i<n_bit; ++i) {
 			infile.read(ch,1);
 			b=ch[0];
-			for (size_t j=0; j<4; ++j) {                //minor allele homozygous: 2.0; major: 0.0;
-				if ((i==(n_bit-1)) && ci_total==indicator_idv.size() ) {break;}
+
+			// Minor allele homozygous: 2.0; major: 0.0.
+			for (size_t j=0; j<4; ++j) {                
+				if ((i==(n_bit-1)) &&
+				    ci_total==indicator_idv.size()) {
+				  break;
+				}
 				if (indicator_idv[ci_total]==1) {
 					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {x_train_mean+=2.0; n_train_nomiss++;}
-						else {x_train_mean+=1.0; n_train_nomiss++;}
+						if (b[2*j+1]==0) {
+						  x_train_mean+=2.0;
+						  n_train_nomiss++;
+						}
+						else {
+						  x_train_mean+=1.0;
+						  n_train_nomiss++;
+						}
 					}
 					else {
-						if (b[2*j+1]==1) {n_train_nomiss++;}                                  
+						if (b[2*j+1]==1) {
+						  n_train_nomiss++;
+						}
 						else {}
 					}
 				} else {
 					if (b[2*j]==0) {
-						if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; }
-						else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; }
+						if (b[2*j+1]==0) {
+						  gsl_vector_set(x,ci_test,2);
+						  x_mean+=2.0;
+						}
+						else {
+						  gsl_vector_set(x,ci_test,1);
+						  x_mean+=1.0;
+						}
 					}
 					else {
-						if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); }                                  
-						else {gsl_vector_set(x, ci_test, -9); n_miss++; }
+						if (b[2*j+1]==1) {
+						  gsl_vector_set(x,ci_test,0);
+						}
+						else {
+						  gsl_vector_set(x,ci_test,-9);
+						  n_miss++;
+						}
 					}
 					ci_test++;
 				}
@@ -378,7 +408,11 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt)
 			}
 		}
 		
-		if (x->size==n_miss) {cout<<"snp "<<rs<<" has missing genotype for all individuals and will be ignored."<<endl; continue;}
+		if (x->size==n_miss) {
+		  cout << "snp " << rs << " has missing genotype for all " <<
+		    "individuals and will be ignored."<<endl;
+		  continue;
+		}
 		
 		x_mean/=(double)(x->size-n_miss);
 		x_train_mean/=(double)(n_train_nomiss);
@@ -407,13 +441,10 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt)
 	return;
 }
 
-
-
-
-//predict missing phenotypes using ridge regression
-//Y_hat contains fixed effects
-void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix *Y_full) 
-{	
+// Predict missing phenotypes using ridge regression.
+// Y_hat contains fixed effects
+void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
+		       gsl_matrix *Y_full) {	
 	gsl_vector *y_obs=gsl_vector_alloc (np_obs);
 	gsl_vector *y_miss=gsl_vector_alloc (np_miss);
 	gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs);
@@ -422,20 +453,22 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix
 	
 	size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0;
 	
-	//obtain H_oo, H_mo
+	// Obtain H_oo, H_mo.
 	c_obs1=0; c_miss1=0; 
 	for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) {
 		if (indicator_cvt[i1]==0) {continue;}
 		for (vector<int>::size_type j1=0; j1<n_ph; ++j1) {
 			
 			c_obs2=0; c_miss2=0;
-			for (vector<int>::size_type i2=0; i2<indicator_pheno.size(); ++i2) {
+			for (vector<int>::size_type i2=0;
+			     i2<indicator_pheno.size(); ++i2) {
 				if (indicator_cvt[i2]==0) {continue;}
-				for (vector<int>::size_type j2=0; j2<n_ph; j2++) {
+				for (vector<int>::size_type j2=0;
+				     j2<n_ph; j2++) {
 					
 					if (indicator_pheno[i2][j2]==1) {
-						if (indicator_pheno[i1][j1]==1) {
-							gsl_matrix_set (H_oo, c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
+					      if (indicator_pheno[i1][j1]==1) {
+						gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
 						} else {
 							gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) );
 						}
@@ -455,16 +488,16 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix
 		
 	}	
 	
-	//do LU decomposition of H_oo
+	// Do LU decomposition of H_oo.
 	int sig;
 	gsl_permutation * pmt=gsl_permutation_alloc (np_obs);
 	LUDecomp (H_oo, pmt, &sig);
 	
-//	if (mode_temp==0) {
-		//obtain y_obs=y_full-y_hat
-		//add the fixed effects part to y_miss: y_miss=y_hat
+		// Obtain y_obs=y_full-y_hat.
+		// Add the fixed effects part to y_miss: y_miss=y_hat.
 		c_obs1=0; c_miss1=0;
-		for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) {
+		for (vector<int>::size_type i=0;
+		     i<indicator_pheno.size(); ++i) {
 			if (indicator_cvt[i]==0) {continue;}
 			
 			for (vector<int>::size_type j=0; j<n_ph; ++j) {
@@ -482,9 +515,10 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix
 		
 		gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
 		
-		//put back predicted y_miss to Y_full
+		// Put back predicted y_miss to Y_full.
 		c_miss1=0;
-		for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) {
+		for (vector<int>::size_type i=0;
+		     i<indicator_pheno.size(); ++i) {
 			if (indicator_cvt[i]==0) {continue;}
 			
 			for (vector<int>::size_type j=0; j<n_ph; ++j) {
@@ -494,44 +528,8 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix
 				}
 			}
 		}
-/*
-	} else {
-		for (size_t k=0; k<mode_temp; k++) {
-			c_obs1=0; c_miss1=0;
-			for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) {
-				if (indicator_cvt[i]==0) {continue;}
-				
-				for (vector<int>::size_type j=0; j<2; ++j) {
-					if (indicator_pheno[i][j]==1) {
-						gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j+k*2)-gsl_matrix_get (Y_hat, i, j) );
-						c_obs1++;
-					} else {
-						gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) );
-						c_miss1++;
-					}
-				}
-			}	
-			
-			LUSolve (H_oo, pmt, y_obs, Hiy);
-			
-			gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss);
-			
-			//put back predicted y_miss to Y_full
-			c_miss1=0;
-			for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) {
-				if (indicator_cvt[i]==0) {continue;}
-				
-				for (vector<int>::size_type j=0; j<2; ++j) {
-					if (indicator_pheno[i][j]==0) {
-						gsl_matrix_set (Y_full, i, j+k*2, gsl_vector_get (y_miss, c_miss1) );
-						c_miss1++;
-					}
-				}
-			}
-		}
-	}
-*/
-	//free matrices
+		
+	// Free matrices.
 	gsl_vector_free(y_obs);
 	gsl_vector_free(y_miss);
 	gsl_matrix_free(H_oo);
diff --git a/src/prdt.h b/src/prdt.h
index 8af2cee..2da9fd0 100644
--- a/src/prdt.h
+++ b/src/prdt.h
@@ -1,6 +1,6 @@
 /*
-	Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011  Xiang Zhou
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright (C) 2011-2017, Xiang Zhou
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -13,31 +13,25 @@
     GNU General Public License for more details.
 
     You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #ifndef __PRDT_H__                
 #define __PRDT_H__
 
-
 #include <vector>
 #include <map>
 #include <string.h>
 #include "gsl/gsl_vector.h"
 #include "gsl/gsl_matrix.h"
-
-#ifdef FORCE_FLOAT
-#include "param_float.h"
-#else
 #include "param.h"
-#endif
 
 using namespace std;
 
 class PRDT {
 	
 public:
-	// IO related parameters
+	// IO-related parameters.
 	size_t a_mode;
 	size_t d_pace;
 	
@@ -59,18 +53,19 @@ public:
 	
 	double time_eigen;
 	
-	// Main functions
+	// Main functions.
 	void CopyFromParam (PARAM &cPar);
 	void CopyToParam (PARAM &cPar);
 	void WriteFiles (gsl_vector *y_prdt);
 	void WriteFiles (gsl_matrix *Y_full);
-	void AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt);
+	void AddBV (gsl_matrix *G, const gsl_vector *u_hat,
+		    gsl_vector *y_prdt);
 	void AnalyzeBimbam (gsl_vector *y_prdt);
 	void AnalyzePlink (gsl_vector *y_prdt);
-	void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix *Y_full);
+	void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H,
+			 gsl_matrix *Y_full);
 };
 
-
 #endif
author	Peter Carbonetto	2017-06-04 12:06:36 -0500
committer	Peter Carbonetto	2017-06-04 12:06:36 -0500
commit	c1132606169875be6d07b54b30e8ae9446341bc2 (patch)
tree	13019a8101d2278ab1a928481979cca9c7ee6009 /src
parent	079d7deb888936fe174746d1efd7cd7ed6a511dd (diff)
download	pangemma-c1132606169875be6d07b54b30e8ae9446341bc2.tar.gz