diff options
author | Peter Carbonetto | 2017-06-04 12:06:36 -0500 |
---|---|---|
committer | Peter Carbonetto | 2017-06-04 12:06:36 -0500 |
commit | c1132606169875be6d07b54b30e8ae9446341bc2 (patch) | |
tree | 13019a8101d2278ab1a928481979cca9c7ee6009 /src | |
parent | 079d7deb888936fe174746d1efd7cd7ed6a511dd (diff) | |
download | pangemma-c1132606169875be6d07b54b30e8ae9446341bc2.tar.gz |
Removed FORCE_FLOAT from prdt.h/prdt.cpp.
Diffstat (limited to 'src')
-rw-r--r-- | src/eigenlib.cpp | 94 | ||||
-rw-r--r-- | src/eigenlib.h | 9 | ||||
-rw-r--r-- | src/logistic.cpp | 61 | ||||
-rw-r--r-- | src/logistic.h | 93 | ||||
-rw-r--r-- | src/prdt.cpp | 274 | ||||
-rw-r--r-- | src/prdt.h | 23 |
6 files changed, 274 insertions, 280 deletions
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp index 14ffbf1..7ad250f 100644 --- a/src/eigenlib.cpp +++ b/src/eigenlib.cpp @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -27,19 +27,23 @@ using namespace std; using namespace Eigen; - - -//on two different clusters, compare eigen vs lapack/gsl -//dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl -//dgemv, 20x or 4x faster than gsl, -//eigen, 1x or 0.3x slower than lapack -//invert, 20x or 10x faster than lapack - -void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C) -{ - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) ); +// On two different clusters, compare eigen vs lapack/gsl: +// +// dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl +// dgemv, 20x or 4x faster than gsl, +// eigen, 1x or 0.3x slower than lapack +// invert, 20x or 10x faster than lapack +// +void eigenlib_dgemm (const char *TransA, const char *TransB, + const double alpha, const gsl_matrix *A, + const gsl_matrix *B, const double beta, + gsl_matrix *C) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + B_mat(B->data, B->size1, B->size2, OuterStride<Dynamic>(B->tda) ); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + C_mat(C->data, C->size1, C->size2, OuterStride<Dynamic>(C->tda) ); if (*TransA=='N' || *TransA=='n') { if (*TransB=='N' || *TransB=='n') { @@ -55,19 +59,18 @@ void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, } } - //gsl_matrix_view C_view = gsl_matrix_view_array (C_mat.data(), C->size1, C->size2); - //gsl_matrix_memcpy (C, &C_view.matrix); - return; } - - -void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A, const gsl_vector *x, const double beta, gsl_vector *y) -{ - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); - Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) ); - Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) ); +void eigenlib_dgemv (const char *TransA, const double alpha, + const gsl_matrix *A, const gsl_vector *x, + const double beta, gsl_vector *y) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + A_mat(A->data, A->size1, A->size2, OuterStride<Dynamic>(A->tda) ); + Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > + x_vec(x->data, x->size, InnerStride<Dynamic>(x->stride) ); + Map<Matrix<double, Dynamic, 1>, 0, InnerStride<Dynamic> > + y_vec(y->data, y->size, InnerStride<Dynamic>(y->stride) ); if (*TransA=='N' || *TransA=='n') { y_vec=alpha*A_mat*x_vec+beta*y_vec; @@ -78,38 +81,35 @@ void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A return; } - - -void eigenlib_invert(gsl_matrix *A) -{ - Map<Matrix<double, Dynamic, Dynamic, RowMajor> > A_mat(A->data, A->size1, A->size2); +void eigenlib_invert(gsl_matrix *A) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor> > + A_mat(A->data, A->size1, A->size2); A_mat=A_mat.inverse(); return; } - -void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) -{ - Map<Matrix<double, Dynamic, Dynamic, RowMajor> > A_mat(A->data, A->size1, A->size2); - Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) ); - +void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor> > + A_mat(A->data, A->size1, A->size2); + Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > + b_vec(b->data, b->size, OuterStride<Dynamic>(b->stride) ); A_mat=alpha*b_vec*b_vec.transpose()+A_mat; - return; } - -void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) -{ - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) ); - Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) ); - Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) ); +void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, + gsl_vector *eval) { + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + G_mat(G->data, G->size1, G->size2, OuterStride<Dynamic>(G->tda) ); + Map<Matrix<double, Dynamic, Dynamic, RowMajor>, 0, OuterStride<Dynamic> > + U_mat(U->data, U->size1, U->size2, OuterStride<Dynamic>(U->tda) ); + Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic> > + eval_vec(eval->data, eval->size, OuterStride<Dynamic>(eval->stride) ); SelfAdjointEigenSolver<MatrixXd> es(G_mat); - if (es.info() != Success) abort(); - + if (es.info() != Success) + abort(); eval_vec=es.eigenvalues(); U_mat=es.eigenvectors(); - return; } diff --git a/src/eigenlib.h b/src/eigenlib.h index f869786..8cb8880 100644 --- a/src/eigenlib.h +++ b/src/eigenlib.h @@ -23,8 +23,13 @@ using namespace std; -void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C); -void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A, const gsl_vector *x, const double beta, gsl_vector *y); +void eigenlib_dgemm (const char *TransA, const char *TransB, + const double alpha, const gsl_matrix *A, + const gsl_matrix *B, const double beta, + gsl_matrix *C); +void eigenlib_dgemv (const char *TransA, const double alpha, + const gsl_matrix *A, const gsl_vector *x, + const double beta, gsl_vector *y); void eigenlib_invert(gsl_matrix *A); void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A); void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval); diff --git a/src/logistic.cpp b/src/logistic.cpp index 1b47946..002ce98 100644 --- a/src/logistic.cpp +++ b/src/logistic.cpp @@ -7,45 +7,40 @@ #include <gsl/gsl_linalg.h>
#include "logistic.h"
-// I need to bundle all the data that goes to the function to optimze together.
+// I need to bundle all the data that goes to the function to optimze
+// together.
typedef struct{
gsl_matrix_int *X;
gsl_vector_int *nlev;
gsl_vector *y;
- gsl_matrix *Xc; // continuous covariates Matrix Nobs x Kc (NULL if not used)
+ gsl_matrix *Xc; // continuous covariates matrix Nobs x Kc (NULL if not used)
double lambdaL1;
double lambdaL2;
-}fix_parm_mixed_T;
-
-
-
-
-
-
-double fLogit_mixed(gsl_vector *beta
- ,gsl_matrix_int *X
- ,gsl_vector_int *nlev
- ,gsl_matrix *Xc
- ,gsl_vector *y
- ,double lambdaL1
- ,double lambdaL2)
-{
+} fix_parm_mixed_T;
+
+double fLogit_mixed(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_matrix *Xc,
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2) {
int n = y->size;
- // int k = X->size2;
int npar = beta->size;
double total = 0;
double aux = 0;
- /* omp_set_num_threads(ompthr); */
- /* /\* Changed loop start at 1 instead of 0 to avoid regularization of beta_0*\/ */
- /* /\*#pragma omp parallel for reduction (+:total)*\/ */
+ // Changed loop start at 1 instead of 0 to avoid regularization of
+ // beta_0*\/ */
+ // #pragma omp parallel for reduction (+:total)
for(int i = 1; i < npar; ++i)
total += beta->data[i]*beta->data[i];
total = (-total*lambdaL2/2);
- /* /\*#pragma omp parallel for reduction (+:aux)*\/ */
+ // #pragma omp parallel for reduction (+:aux)
for(int i = 1; i < npar; ++i)
aux += (beta->data[i]>0 ? beta->data[i] : -beta->data[i]);
total = total-aux*lambdaL1;
- /* #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y) reduction (+:total) */
+ // #pragma omp parallel for schedule(static) shared(n,beta,X,nlev,y)
+ // #reduction (+:total)
for(int i = 0; i < n; ++i) {
double Xbetai=beta->data[0];
int iParm=1;
@@ -94,11 +89,12 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params, int n = p->y->size;
int K = p->X->size2;
int Kc = p->Xc->size2;
- int npar = beta->size;
+ int npar = beta->size;
+
// Intitialize gradient out necessary?
for(int i = 0; i < npar; ++i)
out->data[i]= 0;
- /* Changed loop start at 1 instead of 0 to avoid regularization of beta 0 */
+ // Changed loop start at 1 instead of 0 to avoid regularization of beta 0.
for(int i = 1; i < npar; ++i)
out->data[i]= p->lambdaL2*beta->data[i];
for(int i = 1; i < npar; ++i)
@@ -113,7 +109,8 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params, Xbetai+=beta->data[gsl_matrix_int_get(p->X,i,k)-1+iParm];
iParm+=p->nlev->data[k]-1;
}
- // Adding the continuous
+
+ // Adding the continuous.
for(int k = 0; k < Kc; ++k)
Xbetai+= gsl_matrix_get(p->Xc,i,k)*beta->data[iParm++];
@@ -126,7 +123,8 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params, out->data[gsl_matrix_int_get(p->X,i,k)-1+iParm]+=pn;
iParm+=p->nlev->data[k]-1;
}
- // Adding the continuous
+
+ // Adding the continuous.
for(int k = 0; k < Kc; ++k) {
out->data[iParm++] += gsl_matrix_get(p->Xc,i,k)*pn;
}
@@ -134,12 +132,9 @@ wgsl_mixed_optim_df (const gsl_vector *beta, void *params, }
-
-/* The Hessian of f */
-void
-wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
- gsl_matrix *out)
-{
+// The Hessian of f.
+void wgsl_mixed_optim_hessian (const gsl_vector *beta, void *params,
+ gsl_matrix *out) {
fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
int n = p->y->size;
int K = p->X->size2;
diff --git a/src/logistic.h b/src/logistic.h index a68ee09..e951935 100644 --- a/src/logistic.h +++ b/src/logistic.h @@ -1,52 +1,54 @@ #ifndef LOGISTIC_H_ /* Include guard */
#define LOGISTIC_H_
-/* Mixed interface */
-void logistic_mixed_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
- ,gsl_matrix_int *X //Matrix Nobs x K
- ,gsl_vector_int *nlev // Vector with number categories
- ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc
- ,gsl_vector *yhat //Vector of prob. predicted by the logistic
- );
+// Mixed interface.
+void logistic_mixed_pred(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc.
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with num. categories.
+ gsl_matrix *Xc, // Continuous covariates matrix
+ // Nobs x Kc
+ gsl_vector *yhat); // Vector of prob. predicted by
+ // the logistic.
-int logistic_mixed_fit(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
- ,gsl_matrix_int *X //Matrix Nobs x K
- ,gsl_vector_int *nlev // Vector with number categories
- ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc
- ,gsl_vector *y //Vector of prob. to predict
- ,double lambdaL1 // Regularization L1 0.0 if not used
- ,double lambdaL2); // Regularization L2 0.0 if not used
+int logistic_mixed_fit(gsl_vector *beta, // Vector of parameters
+ // length = 1+Sum_k(C_k-1)+Kc
+ gsl_matrix_int *X, // Matrix Nobs x K.
+ gsl_vector_int *nlev, // Vector with number categories.
+ gsl_matrix *Xc, // Continuous covariates
+ // matrix Nobs x Kc
+ gsl_vector *y, // Vector of prob. to predict.
+ double lambdaL1, // Reg. L1 0.0 if not used.
+ double lambdaL2); // Reg. L2 0.0 if not used.
-double fLogit_mixed(gsl_vector *beta
- ,gsl_matrix_int *X
- ,gsl_vector_int *nlev
- ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc
- ,gsl_vector *y
- ,double lambdaL1
- ,double lambdaL2);
+double fLogit_mixed(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_matrix *Xc, // continuous covariates matrix Nobs x Kc
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
-/* Categorical only interface */
-void logistic_cat_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
- ,gsl_matrix_int *X //Matrix Nobs x K
- ,gsl_vector_int *nlev // Vector with number categories
- ,gsl_vector *yhat //Vector of prob. predicted by the logistic
- );
+// Categorical-only interface.
+void logistic_cat_pred(gsl_vector *beta, // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+ gsl_matrix_int *X, //Matrix Nobs x K
+ gsl_vector_int *nlev, // Vector with number categories
+ gsl_vector *yhat); //Vector of prob. predicted by the logistic
-int logistic_cat_fit(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
- ,gsl_matrix_int *X //Matrix Nobs x K
- ,gsl_vector_int *nlev // Vector with number categories
- ,gsl_vector *y //Vector of prob. to predict
- ,double lambdaL1 // Regularization L1 0.0 if not used
- ,double lambdaL2); // Regularization L2 0.0 if not used
-
-double fLogit_cat(gsl_vector *beta
- ,gsl_matrix_int *X
- ,gsl_vector_int *nlev
- ,gsl_vector *y
- ,double lambdaL1
- ,double lambdaL2);
+int logistic_cat_fit(gsl_vector *beta, // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
+ gsl_matrix_int *X, //Matrix Nobs x K
+ gsl_vector_int *nlev, // Vector with number categories
+ gsl_vector *y, //Vector of prob. to predict
+ double lambdaL1, // Regularization L1 0.0 if not used
+ double lambdaL2); // Regularization L2 0.0 if not used
+double fLogit_cat(gsl_vector *beta,
+ gsl_matrix_int *X,
+ gsl_vector_int *nlev,
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
/* Continuous only interface */
void logistic_cont_pred(gsl_vector *beta // Vector of parameters length = 1 + Sum_k(C_k - 1) + Kc
@@ -60,11 +62,10 @@ int logistic_cont_fit(gsl_vector *beta // Vector of parameters length = 1 + Sum ,double lambdaL1 // Regularization L1 0.0 if not used
,double lambdaL2); // Regularization L2 0.0 if not used
-double fLogit_cont(gsl_vector *beta
- ,gsl_matrix *Xc // continuous covariates Matrix Nobs x Kc
- ,gsl_vector *y
- ,double lambdaL1
- ,double lambdaL2);
-
+double fLogit_cont(gsl_vector *beta,
+ gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc .
+ gsl_vector *y,
+ double lambdaL1,
+ double lambdaL2);
#endif // LOGISTIC_H_
diff --git a/src/prdt.cpp b/src/prdt.cpp index 2875119..db0fa14 100644 --- a/src/prdt.cpp +++ b/src/prdt.cpp @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,10 +13,8 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - - + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ #include <iostream> #include <sstream> @@ -33,28 +31,16 @@ #include "gsl/gsl_linalg.h" #include "gsl/gsl_blas.h" - #include "io.h" -#include "lapack.h" //for functions EigenDecomp +#include "lapack.h" #include "gzstream.h" - -#ifdef FORCE_FLOAT -#include "io_float.h" -#include "prdt_float.h" -#include "mathfunc_float.h" -#else #include "io.h" #include "prdt.h" #include "mathfunc.h" -#endif using namespace std; - - - -void PRDT::CopyFromParam (PARAM &cPar) -{ +void PRDT::CopyFromParam (PARAM &cPar) { a_mode=cPar.a_mode; d_pace=cPar.d_pace; @@ -81,19 +67,14 @@ void PRDT::CopyFromParam (PARAM &cPar) return; } -void PRDT::CopyToParam (PARAM &cPar) -{ +void PRDT::CopyToParam (PARAM &cPar) { cPar.ns_test=ns_test; cPar.time_eigen=time_eigen; return; } - - - -void PRDT::WriteFiles (gsl_vector *y_prdt) -{ +void PRDT::WriteFiles (gsl_vector *y_prdt) { string file_str; file_str=path_out+"/"+file_out; file_str+="."; @@ -101,7 +82,10 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) file_str+=".txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return; + } size_t ci_test=0; for (size_t i=0; i<indicator_idv.size(); i++) { @@ -118,15 +102,16 @@ void PRDT::WriteFiles (gsl_vector *y_prdt) return; } - -void PRDT::WriteFiles (gsl_matrix *Y_full) -{ +void PRDT::WriteFiles (gsl_matrix *Y_full) { string file_str; file_str=path_out+"/"+file_out; file_str+=".prdt.txt"; ofstream outfile (file_str.c_str(), ofstream::out); - if (!outfile) {cout<<"error writing file: "<<file_str.c_str()<<endl; return;} + if (!outfile) { + cout<<"error writing file: "<<file_str.c_str()<<endl; + return; + } size_t ci_test=0; for (size_t i=0; i<indicator_cvt.size(); i++) { @@ -134,7 +119,8 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) outfile<<"NA"<<endl; } else { for (size_t j=0; j<Y_full->size2; j++) { - outfile<<gsl_matrix_get (Y_full, ci_test, j)<<"\t"; + outfile << gsl_matrix_get(Y_full,ci_test,j) << + "\t"; } outfile<<endl; ci_test++; @@ -146,11 +132,7 @@ void PRDT::WriteFiles (gsl_matrix *Y_full) return; } - - - -void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) -{ +void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) { size_t ni_test=u_hat->size, ni_total=G->size1; gsl_matrix *Goo=gsl_matrix_alloc (ni_test, ni_test); @@ -190,7 +172,9 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) clock_t time_start=clock(); EigenDecomp (Goo, U, eval, 0); for (size_t i=0; i<eval->size; i++) { - if (gsl_vector_get(eval,i)<1e-10) {gsl_vector_set(eval, i, 0);} + if (gsl_vector_get(eval,i)<1e-10) { + gsl_vector_set(eval, i, 0); + } } time_eigen=(clock()-time_start)/(double(CLOCKS_PER_SEC)*60.0); @@ -198,12 +182,15 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) gsl_blas_dgemv (CblasTrans, 1.0, U, u_hat, 0.0, Utu); for (size_t i=0; i<eval->size; i++) { d=gsl_vector_get(eval, i); - if (d!=0) {d=gsl_vector_get(Utu, i)/d; gsl_vector_set(Utu, i, d);} + if (d!=0) { + d=gsl_vector_get(Utu, i)/d; + gsl_vector_set(Utu, i, d); + } } gsl_blas_dgemv (CblasNoTrans, 1.0, U, Utu, 0.0, eval); gsl_blas_dgemv (CblasNoTrans, 1.0, Gfo, eval, 1.0, y_prdt); - //free matrices + // Free matrices. gsl_matrix_free(Goo); gsl_matrix_free(Gfo); gsl_matrix_free(U); @@ -215,13 +202,12 @@ void PRDT::AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt) return; } - - -void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) -{ +void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) { igzstream infile (file_geno.c_str(), igzstream::in); -// ifstream infile (file_geno.c_str(), ifstream::in); - if (!infile) {cout<<"error reading genotype file:"<<file_geno<<endl; return;} + if (!infile) { + cout<<"error reading genotype file:"<<file_geno<<endl; + return; + } string line; char *ch_ptr; @@ -235,32 +221,44 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) ns_test=0; - //start reading genotypes and analyze + // Start reading genotypes and analyze. for (size_t t=0; t<ns_total; ++t) { !safeGetline(infile, line).eof(); - if (t%d_pace==0 || t==(ns_total-1)) {ProgressBar ("Reading SNPs ", t, ns_total-1);} + if (t%d_pace==0 || t==(ns_total-1)) { + ProgressBar ("Reading SNPs ", t, ns_total-1); + } ch_ptr=strtok ((char *)line.c_str(), " , \t"); rs=ch_ptr; ch_ptr=strtok (NULL, " , \t"); ch_ptr=strtok (NULL, " , \t"); - if (mapRS2est.count(rs)==0) {continue;} else {effect_size=mapRS2est[rs];} + if (mapRS2est.count(rs)==0) { + continue; + } else { + effect_size=mapRS2est[rs]; + } + + x_mean=0.0; + c_phen=0; + n_miss=0; + x_train_mean=0; + n_train_nomiss=0; - x_mean=0.0; c_phen=0; n_miss=0; x_train_mean=0; n_train_nomiss=0; gsl_vector_set_zero(x_miss); for (size_t i=0; i<indicator_idv.size(); ++i) { ch_ptr=strtok (NULL, " , \t"); if (indicator_idv[i]==1) { if (strcmp(ch_ptr, "NA")!=0) { - geno=atof(ch_ptr); + geno=atof(ch_ptr); x_train_mean+=geno; n_train_nomiss++; } } else { if (strcmp(ch_ptr, "NA")==0) { - gsl_vector_set(x_miss, c_phen, 0.0); n_miss++; + gsl_vector_set(x_miss, c_phen, 0.0); + n_miss++; } else { geno=atof(ch_ptr); @@ -272,7 +270,11 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) } } - if (x->size==n_miss) {cout<<"snp "<<rs<<" has missing genotype for all individuals and will be ignored."<<endl; continue;} + if (x->size==n_miss) { + cout << "snp " << rs << " has missing genotype for all " << + "individuals and will be ignored." << endl; + continue;} + x_mean/=(double)(x->size-n_miss); x_train_mean/=(double)(n_train_nomiss); @@ -303,17 +305,13 @@ void PRDT::AnalyzeBimbam (gsl_vector *y_prdt) return; } - - - - - - -void PRDT::AnalyzePlink (gsl_vector *y_prdt) -{ +void PRDT::AnalyzePlink (gsl_vector *y_prdt) { string file_bed=file_bfile+".bed"; ifstream infile (file_bed.c_str(), ios::binary); - if (!infile) {cout<<"error reading bed file:"<<file_bed<<endl; return;} + if (!infile) { + cout<<"error reading bed file:"<<file_bed<<endl; + return; + } char ch[1]; bitset<8> b; @@ -324,11 +322,11 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) gsl_vector *x=gsl_vector_alloc (y_prdt->size); - //calculate n_bit and c, the number of bit for each snp + // Calculate n_bit and c, the number of bit for each SNP. if (indicator_idv.size()%4==0) {n_bit=indicator_idv.size()/4;} else {n_bit=indicator_idv.size()/4+1; } - //print the first three majic numbers + // Print the first 3 magic numbers. for (size_t i=0; i<3; ++i) { infile.read(ch,1); b=ch[0]; @@ -337,39 +335,71 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) ns_test=0; for (vector<SNPINFO>::size_type t=0; t<snpInfo.size(); ++t) { - if (t%d_pace==0 || t==snpInfo.size()-1) {ProgressBar ("Reading SNPs ", t, snpInfo.size()-1);} - //if (indicator_snp[t]==0) {continue;} + if (t%d_pace==0 || t==snpInfo.size()-1) { + ProgressBar ("Reading SNPs ", t, snpInfo.size()-1); + } rs=snpInfo[t].rs_number; - if (mapRS2est.count(rs)==0) {continue;} else {effect_size=mapRS2est[rs];} - - infile.seekg(t*n_bit+3); //n_bit, and 3 is the number of magic numbers + if (mapRS2est.count(rs)==0) { + continue; + } else { + effect_size=mapRS2est[rs]; + } + + // n_bit, and 3 is the number of magic numbers. + infile.seekg(t*n_bit+3); - //read genotypes - x_mean=0.0; n_miss=0; ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0; + // Read genotypes. + x_mean=0.0; + n_miss=0; + ci_total=0; ci_test=0; x_train_mean=0; n_train_nomiss=0; for (size_t i=0; i<n_bit; ++i) { infile.read(ch,1); b=ch[0]; - for (size_t j=0; j<4; ++j) { //minor allele homozygous: 2.0; major: 0.0; - if ((i==(n_bit-1)) && ci_total==indicator_idv.size() ) {break;} + + // Minor allele homozygous: 2.0; major: 0.0. + for (size_t j=0; j<4; ++j) { + if ((i==(n_bit-1)) && + ci_total==indicator_idv.size()) { + break; + } if (indicator_idv[ci_total]==1) { if (b[2*j]==0) { - if (b[2*j+1]==0) {x_train_mean+=2.0; n_train_nomiss++;} - else {x_train_mean+=1.0; n_train_nomiss++;} + if (b[2*j+1]==0) { + x_train_mean+=2.0; + n_train_nomiss++; + } + else { + x_train_mean+=1.0; + n_train_nomiss++; + } } else { - if (b[2*j+1]==1) {n_train_nomiss++;} + if (b[2*j+1]==1) { + n_train_nomiss++; + } else {} } } else { if (b[2*j]==0) { - if (b[2*j+1]==0) {gsl_vector_set(x, ci_test, 2); x_mean+=2.0; } - else {gsl_vector_set(x, ci_test, 1); x_mean+=1.0; } + if (b[2*j+1]==0) { + gsl_vector_set(x,ci_test,2); + x_mean+=2.0; + } + else { + gsl_vector_set(x,ci_test,1); + x_mean+=1.0; + } } else { - if (b[2*j+1]==1) {gsl_vector_set(x, ci_test, 0); } - else {gsl_vector_set(x, ci_test, -9); n_miss++; } + if (b[2*j+1]==1) { + gsl_vector_set(x,ci_test,0); + } + else { + gsl_vector_set(x,ci_test,-9); + n_miss++; + } } ci_test++; } @@ -378,7 +408,11 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) } } - if (x->size==n_miss) {cout<<"snp "<<rs<<" has missing genotype for all individuals and will be ignored."<<endl; continue;} + if (x->size==n_miss) { + cout << "snp " << rs << " has missing genotype for all " << + "individuals and will be ignored."<<endl; + continue; + } x_mean/=(double)(x->size-n_miss); x_train_mean/=(double)(n_train_nomiss); @@ -407,13 +441,10 @@ void PRDT::AnalyzePlink (gsl_vector *y_prdt) return; } - - - -//predict missing phenotypes using ridge regression -//Y_hat contains fixed effects -void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix *Y_full) -{ +// Predict missing phenotypes using ridge regression. +// Y_hat contains fixed effects +void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, + gsl_matrix *Y_full) { gsl_vector *y_obs=gsl_vector_alloc (np_obs); gsl_vector *y_miss=gsl_vector_alloc (np_miss); gsl_matrix *H_oo=gsl_matrix_alloc (np_obs, np_obs); @@ -422,20 +453,22 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix size_t c_obs1=0, c_obs2=0, c_miss1=0, c_miss2=0; - //obtain H_oo, H_mo + // Obtain H_oo, H_mo. c_obs1=0; c_miss1=0; for (vector<int>::size_type i1=0; i1<indicator_pheno.size(); ++i1) { if (indicator_cvt[i1]==0) {continue;} for (vector<int>::size_type j1=0; j1<n_ph; ++j1) { c_obs2=0; c_miss2=0; - for (vector<int>::size_type i2=0; i2<indicator_pheno.size(); ++i2) { + for (vector<int>::size_type i2=0; + i2<indicator_pheno.size(); ++i2) { if (indicator_cvt[i2]==0) {continue;} - for (vector<int>::size_type j2=0; j2<n_ph; j2++) { + for (vector<int>::size_type j2=0; + j2<n_ph; j2++) { if (indicator_pheno[i2][j2]==1) { - if (indicator_pheno[i1][j1]==1) { - gsl_matrix_set (H_oo, c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); + if (indicator_pheno[i1][j1]==1) { + gsl_matrix_set(H_oo,c_obs1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); } else { gsl_matrix_set (H_mo, c_miss1, c_obs2, gsl_matrix_get (H, c_obs1+c_miss1, c_obs2+c_miss2) ); } @@ -455,16 +488,16 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix } - //do LU decomposition of H_oo + // Do LU decomposition of H_oo. int sig; gsl_permutation * pmt=gsl_permutation_alloc (np_obs); LUDecomp (H_oo, pmt, &sig); -// if (mode_temp==0) { - //obtain y_obs=y_full-y_hat - //add the fixed effects part to y_miss: y_miss=y_hat + // Obtain y_obs=y_full-y_hat. + // Add the fixed effects part to y_miss: y_miss=y_hat. c_obs1=0; c_miss1=0; - for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { + for (vector<int>::size_type i=0; + i<indicator_pheno.size(); ++i) { if (indicator_cvt[i]==0) {continue;} for (vector<int>::size_type j=0; j<n_ph; ++j) { @@ -482,9 +515,10 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); - //put back predicted y_miss to Y_full + // Put back predicted y_miss to Y_full. c_miss1=0; - for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { + for (vector<int>::size_type i=0; + i<indicator_pheno.size(); ++i) { if (indicator_cvt[i]==0) {continue;} for (vector<int>::size_type j=0; j<n_ph; ++j) { @@ -494,44 +528,8 @@ void PRDT::MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix } } } -/* - } else { - for (size_t k=0; k<mode_temp; k++) { - c_obs1=0; c_miss1=0; - for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<2; ++j) { - if (indicator_pheno[i][j]==1) { - gsl_vector_set (y_obs, c_obs1, gsl_matrix_get (Y_full, i, j+k*2)-gsl_matrix_get (Y_hat, i, j) ); - c_obs1++; - } else { - gsl_vector_set (y_miss, c_miss1, gsl_matrix_get (Y_hat, i, j) ); - c_miss1++; - } - } - } - - LUSolve (H_oo, pmt, y_obs, Hiy); - - gsl_blas_dgemv (CblasNoTrans, 1.0, H_mo, Hiy, 1.0, y_miss); - - //put back predicted y_miss to Y_full - c_miss1=0; - for (vector<int>::size_type i=0; i<indicator_pheno.size(); ++i) { - if (indicator_cvt[i]==0) {continue;} - - for (vector<int>::size_type j=0; j<2; ++j) { - if (indicator_pheno[i][j]==0) { - gsl_matrix_set (Y_full, i, j+k*2, gsl_vector_get (y_miss, c_miss1) ); - c_miss1++; - } - } - } - } - } -*/ - //free matrices + + // Free matrices. gsl_vector_free(y_obs); gsl_vector_free(y_miss); gsl_matrix_free(H_oo); @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,31 +13,25 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __PRDT_H__ #define __PRDT_H__ - #include <vector> #include <map> #include <string.h> #include "gsl/gsl_vector.h" #include "gsl/gsl_matrix.h" - -#ifdef FORCE_FLOAT -#include "param_float.h" -#else #include "param.h" -#endif using namespace std; class PRDT { public: - // IO related parameters + // IO-related parameters. size_t a_mode; size_t d_pace; @@ -59,18 +53,19 @@ public: double time_eigen; - // Main functions + // Main functions. void CopyFromParam (PARAM &cPar); void CopyToParam (PARAM &cPar); void WriteFiles (gsl_vector *y_prdt); void WriteFiles (gsl_matrix *Y_full); - void AddBV (gsl_matrix *G, const gsl_vector *u_hat, gsl_vector *y_prdt); + void AddBV (gsl_matrix *G, const gsl_vector *u_hat, + gsl_vector *y_prdt); void AnalyzeBimbam (gsl_vector *y_prdt); void AnalyzePlink (gsl_vector *y_prdt); - void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, gsl_matrix *Y_full); + void MvnormPrdt (const gsl_matrix *Y_hat, const gsl_matrix *H, + gsl_matrix *Y_full); }; - #endif |