diff options
author | Peter Carbonetto | 2017-05-30 22:36:50 -0500 |
---|---|---|
committer | Peter Carbonetto | 2017-05-30 22:36:50 -0500 |
commit | 3f5d57d302188525f266ec041ebb745f6931876e (patch) | |
tree | 853d6aafccc41ba36f6d1e328c333cd6081fbd7f /src | |
parent | 5252c296a389f296e97d95e56f13b77351b32bec (diff) | |
download | pangemma-3f5d57d302188525f266ec041ebb745f6931876e.tar.gz |
Removing FORCE_FLOAT from some C++ source files.
Diffstat (limited to 'src')
-rw-r--r-- | src/eigenlib.h | 8 | ||||
-rw-r--r-- | src/lapack.cpp | 445 | ||||
-rw-r--r-- | src/lapack.h | 48 | ||||
-rw-r--r-- | src/param.cpp | 319 | ||||
-rw-r--r-- | src/param.h | 319 |
5 files changed, 677 insertions, 462 deletions
diff --git a/src/eigenlib.h b/src/eigenlib.h index e9768c2..493907c 100644 --- a/src/eigenlib.h +++ b/src/eigenlib.h @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __EIGENLIB_H__ @@ -23,12 +23,10 @@ using namespace std; - void eigenlib_dgemm (const char *TransA, const char *TransB, const double alpha, const gsl_matrix *A, const gsl_matrix *B, const double beta, gsl_matrix *C); void eigenlib_dgemv (const char *TransA, const double alpha, const gsl_matrix *A, const gsl_vector *x, const double beta, gsl_vector *y); void eigenlib_invert(gsl_matrix *A); void eigenlib_dsyr (const double alpha, const gsl_vector *b, gsl_matrix *A); void eigenlib_eigensymm (const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval); - #endif diff --git a/src/lapack.cpp b/src/lapack.cpp index d9e4149..18399a2 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -25,82 +25,129 @@ using namespace std; -extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, float *ALPHA, float *A, int *LDA, float *B, int *LDB, float *BETA, float *C, int *LDC); +extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, + float *ALPHA, float *A, int *LDA, float *B, int *LDB, + float *BETA, float *C, int *LDC); extern "C" void spotrf_(char *UPLO, int *N, float *A, int *LDA, int *INFO); -extern "C" void spotrs_(char *UPLO, int *N, int *NRHS, float *A, int *LDA, float *B, int *LDB, int *INFO); -extern "C" void ssyev_(char* JOBZ, char* UPLO, int *N, float *A, int *LDA, float *W, float *WORK, int *LWORK, int *INFO); -extern "C" void ssyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, float *A, int *LDA, float *VL, float *VU, int *IL, int *IU, float *ABSTOL, int *M, float *W, float *Z, int *LDZ, int *ISUPPZ, float *WORK, int *LWORK, int *IWORK, int *LIWORK, int *INFO); +extern "C" void spotrs_(char *UPLO, int *N, int *NRHS, float *A, int *LDA, + float *B, int *LDB, int *INFO); +extern "C" void ssyev_(char* JOBZ, char* UPLO, int *N, float *A, int *LDA, + float *W, float *WORK, int *LWORK, int *INFO); +extern "C" void ssyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, + float *A, int *LDA, float *VL, float *VU, int *IL, + int *IU, float *ABSTOL, int *M, float *W, float *Z, + int *LDZ, int *ISUPPZ, float *WORK, int *LWORK, + int *IWORK, int *LIWORK, int *INFO); extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY); -extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double *ALPHA, double *A, int *LDA, double *B, int *LDB, double *BETA, double *C, int *LDC); +extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, + double *ALPHA, double *A, int *LDA, double *B, + int *LDB, double *BETA, double *C, int *LDC); extern "C" void dpotrf_(char *UPLO, int *N, double *A, int *LDA, int *INFO); -extern "C" void dpotrs_(char *UPLO, int *N, int *NRHS, double *A, int *LDA, double *B, int *LDB, int *INFO); -extern "C" void dsyev_(char* JOBZ, char* UPLO, int *N, double *A, int *LDA, double *W, double *WORK, int *LWORK, int *INFO); -extern "C" void dsyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, double *A, int *LDA, double *VL, double *VU, int *IL, int *IU, double *ABSTOL, int *M, double *W, double *Z, int *LDZ, int *ISUPPZ, double *WORK, int *LWORK, int *IWORK, int *LIWORK, int *INFO); +extern "C" void dpotrs_(char *UPLO, int *N, int *NRHS, double *A, int *LDA, + double *B, int *LDB, int *INFO); +extern "C" void dsyev_(char* JOBZ, char* UPLO, int *N, double *A, int *LDA, + double *W, double *WORK, int *LWORK, int *INFO); +extern "C" void dsyevr_(char* JOBZ, char *RANGE, char* UPLO, int *N, + double *A, int *LDA, double *VL, double *VU, + int *IL, int *IU, double *ABSTOL, int *M, + double *W, double *Z, int *LDZ, int *ISUPPZ, + double *WORK, int *LWORK, int *IWORK, + int *LIWORK, int *INFO); extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY); - -//cholesky decomposition, A is distroyed -void lapack_float_cholesky_decomp (gsl_matrix_float *A) -{ +// Cholesky decomposition, A is destroyed. +void lapack_float_cholesky_decomp (gsl_matrix_float *A) { int N=A->size1, LDA=A->size1, INFO; char UPLO='L'; - if (N!=(int)A->size2) {cout<<"Matrix needs to be symmetric and same dimension in lapack_cholesky_decomp."<<endl; return;} + if (N!=(int)A->size2) { + cout << "Matrix needs to be symmetric and same dimension in" << + "lapack_cholesky_decomp." << endl; + return; + } spotrf_(&UPLO, &N, A->data, &LDA, &INFO); - if (INFO!=0) {cout<<"Cholesky decomposition unsuccessful in lapack_cholesky_decomp."<<endl; return;} + if (INFO!=0) { + cout << "Cholesky decomposition unsuccessful in" << + "lapack_cholesky_decomp." << endl; + return; + } return; } -//cholesky decomposition, A is distroyed -void lapack_cholesky_decomp (gsl_matrix *A) -{ +// Cholesky decomposition, A is destroyed. +void lapack_cholesky_decomp (gsl_matrix *A) { int N=A->size1, LDA=A->size1, INFO; char UPLO='L'; - if (N!=(int)A->size2) {cout<<"Matrix needs to be symmetric and same dimension in lapack_cholesky_decomp."<<endl; return;} + if (N!=(int)A->size2) { + cout << "Matrix needs to be symmetric and same dimension in" << + "lapack_cholesky_decomp." << endl; + return; + } dpotrf_(&UPLO, &N, A->data, &LDA, &INFO); - if (INFO!=0) {cout<<"Cholesky decomposition unsuccessful in lapack_cholesky_decomp."<<endl; return;} - + if (INFO!=0) { + cout << "Cholesky decomposition unsuccessful in" << + "lapack_cholesky_decomp."<<endl; + return; + } + return; } -//cholesky solve, A is decomposed, -void lapack_float_cholesky_solve (gsl_matrix_float *A, const gsl_vector_float *b, gsl_vector_float *x) -{ +// Cholesky solve, A is decomposed. +void lapack_float_cholesky_solve (gsl_matrix_float *A, + const gsl_vector_float *b, + gsl_vector_float *x) { int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO; char UPLO='L'; - if (N!=(int)A->size2 || N!=LDB) {cout<<"Matrix needs to be symmetric and same dimension in lapack_cholesky_solve."<<endl; return;} + if (N!=(int)A->size2 || N!=LDB) { + cout << "Matrix needs to be symmetric and same dimension in" << + "lapack_cholesky_solve." << endl; + return; + } gsl_vector_float_memcpy (x, b); spotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); - if (INFO!=0) {cout<<"Cholesky solve unsuccessful in lapack_cholesky_solve."<<endl; return;} + if (INFO!=0) { + cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << + endl; + return; + } return; } -//cholesky solve, A is decomposed, -void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, gsl_vector *x) -{ +// Cholesky solve, A is decomposed. +void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, + gsl_vector *x) { int N=A->size1, NRHS=1, LDA=A->size1, LDB=b->size, INFO; char UPLO='L'; - if (N!=(int)A->size2 || N!=LDB) {cout<<"Matrix needs to be symmetric and same dimension in lapack_cholesky_solve."<<endl; return;} + if (N!=(int)A->size2 || N!=LDB) { + cout << "Matrix needs to be symmetric and same dimension in" << + "lapack_cholesky_solve." << endl; + return; + } gsl_vector_memcpy (x, b); dpotrs_(&UPLO, &N, &NRHS, A->data, &LDA, x->data, &LDB, &INFO); - if (INFO!=0) {cout<<"Cholesky solve unsuccessful in lapack_cholesky_solve."<<endl; return;} + if (INFO!=0) { + cout << "Cholesky solve unsuccessful in lapack_cholesky_solve." << + endl; + return; + } return; } - -void lapack_sgemm (char *TransA, char *TransB, float alpha, const gsl_matrix_float *A, const gsl_matrix_float *B, float beta, gsl_matrix_float *C) -{ +void lapack_sgemm (char *TransA, char *TransB, float alpha, + const gsl_matrix_float *A, const gsl_matrix_float *B, + float beta, gsl_matrix_float *C) { int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2; if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;} @@ -111,8 +158,14 @@ void lapack_sgemm (char *TransA, char *TransB, float alpha, const gsl_matrix_flo else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;} else {cout<<"need 'N' or 'T' in lapack_sgemm"<<endl; return;} - if (K1!=K2) {cout<<"A and B not compatible in lapack_sgemm"<<endl; return;} - if (C->size1!=(size_t)M || C->size2!=(size_t)N) {cout<<"C not compatible in lapack_sgemm"<<endl; return;} + if (K1!=K2) { + cout<<"A and B not compatible in lapack_sgemm"<<endl; + return; + } + if (C->size1!=(size_t)M || C->size2!=(size_t)N) { + cout<<"C not compatible in lapack_sgemm"<<endl; + return; + } gsl_matrix_float *A_t=gsl_matrix_float_alloc (A->size2, A->size1); gsl_matrix_float_transpose_memcpy (A_t, A); @@ -121,7 +174,8 @@ void lapack_sgemm (char *TransA, char *TransB, float alpha, const gsl_matrix_flo gsl_matrix_float *C_t=gsl_matrix_float_alloc (C->size2, C->size1); gsl_matrix_float_transpose_memcpy (C_t, C); - sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB, &beta, C_t->data, &LDC); + sgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, + B_t->data, &LDB, &beta, C_t->data, &LDC); gsl_matrix_float_transpose_memcpy (C, C_t); gsl_matrix_float_free (A_t); @@ -132,8 +186,9 @@ void lapack_sgemm (char *TransA, char *TransB, float alpha, const gsl_matrix_flo -void lapack_dgemm (char *TransA, char *TransB, double alpha, const gsl_matrix *A, const gsl_matrix *B, double beta, gsl_matrix *C) -{ +void lapack_dgemm (char *TransA, char *TransB, double alpha, + const gsl_matrix *A, const gsl_matrix *B, + double beta, gsl_matrix *C) { int M, N, K1, K2, LDA=A->size1, LDB=B->size1, LDC=C->size2; if (*TransA=='N' || *TransA=='n') {M=A->size1; K1=A->size2;} @@ -144,8 +199,14 @@ void lapack_dgemm (char *TransA, char *TransB, double alpha, const gsl_matrix *A else if (*TransB=='T' || *TransB=='t') {N=B->size1; K2=B->size2;} else {cout<<"need 'N' or 'T' in lapack_dgemm"<<endl; return;} - if (K1!=K2) {cout<<"A and B not compatible in lapack_dgemm"<<endl; return;} - if (C->size1!=(size_t)M || C->size2!=(size_t)N) {cout<<"C not compatible in lapack_dgemm"<<endl; return;} + if (K1!=K2) { + cout << "A and B not compatible in lapack_dgemm"<<endl; + return; + } + if (C->size1!=(size_t)M || C->size2!=(size_t)N) { + cout<<"C not compatible in lapack_dgemm"<<endl; + return; + } gsl_matrix *A_t=gsl_matrix_alloc (A->size2, A->size1); gsl_matrix_transpose_memcpy (A_t, A); @@ -154,7 +215,8 @@ void lapack_dgemm (char *TransA, char *TransB, double alpha, const gsl_matrix *A gsl_matrix *C_t=gsl_matrix_alloc (C->size2, C->size1); gsl_matrix_transpose_memcpy (C_t, C); - dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB, &beta, C_t->data, &LDC); + dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, + B_t->data, &LDB, &beta, C_t->data, &LDC); gsl_matrix_transpose_memcpy (C, C_t); @@ -164,58 +226,79 @@ void lapack_dgemm (char *TransA, char *TransB, double alpha, const gsl_matrix *A return; } - - -//eigen value decomposition, matrix A is destroyed, float seems to have problem with large matrices (in mac) -void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, gsl_matrix_float *evec, const size_t flag_largematrix) -{ +// Eigen value decomposition, matrix A is destroyed, float seems to +// have problem with large matrices (in mac). +void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, + gsl_matrix_float *evec, + const size_t flag_largematrix) { if (flag_largematrix==1) { int N=A->size1, LDA=A->size1, INFO, LWORK=-1; char JOBZ='V', UPLO='L'; - if (N!=(int)A->size2 || N!=(int)eval->size) {cout<<"Matrix needs to be symmetric and same dimension in lapack_eigen_symmv."<<endl; return;} - - // float temp[1]; - // ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, temp, &LWORK, &INFO); - // if (INFO!=0) {cout<<"Work space estimate unsuccessful in lapack_eigen_symmv."<<endl; return;} - // LWORK=(int)temp[0]; + if (N!=(int)A->size2 || N!=(int)eval->size) { + cout << "Matrix needs to be symmetric and same" << + "dimension in lapack_eigen_symmv."<<endl; + return; + } LWORK=3*N; float *WORK=new float [LWORK]; - ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO); - if (INFO!=0) {cout<<"Eigen decomposition unsuccessful in lapack_eigen_symmv."<<endl; return;} + ssyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, + &LWORK, &INFO); + if (INFO!=0) { + cout << "Eigen decomposition unsuccessful in" << + "lapack_eigen_symmv."<<endl; + return; + } - gsl_matrix_float_view A_sub=gsl_matrix_float_submatrix(A, 0, 0, N, N); + gsl_matrix_float_view A_sub = + gsl_matrix_float_submatrix(A, 0, 0, N, N); gsl_matrix_float_memcpy (evec, &A_sub.matrix); gsl_matrix_float_transpose (evec); delete [] WORK; } else { - int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO, LWORK=-1, LIWORK=-1; + int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO, + LWORK=-1, LIWORK=-1; char JOBZ='V', UPLO='L', RANGE='A'; float ABSTOL=1.0E-7; - //VL, VU, IL, IU are not referenced; M equals N if RANGE='A' + // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. float VL=0.0, VU=0.0; int IL=0, IU=0, M; - if (N!=(int)A->size2 || N!=(int)eval->size) {cout<<"Matrix needs to be symmetric and same dimension in lapack_float_eigen_symmv."<<endl; return;} + if (N!=(int)A->size2 || N!=(int)eval->size) { + cout << "Matrix needs to be symmetric and same" << + "dimension in lapack_float_eigen_symmv." << endl; + return; + } int *ISUPPZ=new int [2*N]; float WORK_temp[1]; int IWORK_temp[1]; - ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp, &LIWORK, &INFO); - if (INFO!=0) {cout<<"Work space estimate unsuccessful in lapack_float_eigen_symmv."<<endl; return;} + ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, + &VU, &IL, &IU, &ABSTOL, &M, eval->data, + evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK, + IWORK_temp, &LIWORK, &INFO); + if (INFO!=0) { + cout << "Work space estimate unsuccessful in" << + "lapack_float_eigen_symmv." << endl; + return; + } LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0]; - //LWORK=26*N; - //LIWORK=10*N; float *WORK=new float [LWORK]; int *IWORK=new int [LIWORK]; - ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); - if (INFO!=0) {cout<<"Eigen decomposition unsuccessful in lapack_float_eigen_symmv."<<endl; return;} + ssyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, + &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, + &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); + if (INFO!=0) { + cout << "Eigen decomposition unsuccessful in" << + "lapack_float_eigen_symmv." << endl; + return; + } gsl_matrix_float_transpose (evec); @@ -230,24 +313,28 @@ void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, gsl_ -//eigen value decomposition, matrix A is destroyed -void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, const size_t flag_largematrix) -{ +// Eigenvalue decomposition, matrix A is destroyed. +void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, + const size_t flag_largematrix) { if (flag_largematrix==1) { int N=A->size1, LDA=A->size1, INFO, LWORK=-1; char JOBZ='V', UPLO='L'; - if (N!=(int)A->size2 || N!=(int)eval->size) {cout<<"Matrix needs to be symmetric and same dimension in lapack_eigen_symmv."<<endl; return;} - - // double temp[1]; - // dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, temp, &LWORK, &INFO); - // if (INFO!=0) {cout<<"Work space estimate unsuccessful in lapack_eigen_symmv."<<endl; return;} - // LWORK=(int)temp[0]; + if (N!=(int)A->size2 || N!=(int)eval->size) { + cout << "Matrix needs to be symmetric and same" << + "dimension in lapack_eigen_symmv." << endl; + return; + } LWORK=3*N; double *WORK=new double [LWORK]; - dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, &LWORK, &INFO); - if (INFO!=0) {cout<<"Eigen decomposition unsuccessful in lapack_eigen_symmv."<<endl; return;} + dsyev_(&JOBZ, &UPLO, &N, A->data, &LDA, eval->data, WORK, + &LWORK, &INFO); + if (INFO!=0) { + cout<<"Eigen decomposition unsuccessful in" << + "lapack_eigen_symmv." << endl; + return; + } gsl_matrix_view A_sub=gsl_matrix_submatrix(A, 0, 0, N, N); gsl_matrix_memcpy (evec, &A_sub.matrix); @@ -255,32 +342,48 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, cons delete [] WORK; } else { - int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO, LWORK=-1, LIWORK=-1; + int N=A->size1, LDA=A->size1, LDZ=A->size1, INFO; + int LWORK=-1, LIWORK=-1; char JOBZ='V', UPLO='L', RANGE='A'; double ABSTOL=1.0E-7; - //VL, VU, IL, IU are not referenced; M equals N if RANGE='A' + // VL, VU, IL, IU are not referenced; M equals N if RANGE='A'. double VL=0.0, VU=0.0; int IL=0, IU=0, M; - if (N!=(int)A->size2 || N!=(int)eval->size) {cout<<"Matrix needs to be symmetric and same dimension in lapack_eigen_symmv."<<endl; return;} + if (N!=(int)A->size2 || N!=(int)eval->size) { + cout << "Matrix needs to be symmetric and same" << + "dimension in lapack_eigen_symmv." << endl; + return; + } int *ISUPPZ=new int [2*N]; double WORK_temp[1]; int IWORK_temp[1]; - dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp, &LIWORK, &INFO); - if (INFO!=0) {cout<<"Work space estimate unsuccessful in lapack_eigen_symmv."<<endl; return;} + dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, + &IL, &IU, &ABSTOL, &M, eval->data, evec->data, + &LDZ, ISUPPZ, WORK_temp, &LWORK, IWORK_temp, + &LIWORK, &INFO); + if (INFO!=0) { + cout << "Work space estimate unsuccessful in" << + "lapack_eigen_symmv." << endl; + return; + } LWORK=(int)WORK_temp[0]; LIWORK=(int)IWORK_temp[0]; - //LWORK=26*N; - //LIWORK=10*N; double *WORK=new double [LWORK]; int *IWORK=new int [LIWORK]; - dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, &IL, &IU, &ABSTOL, &M, eval->data, evec->data, &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); - if (INFO!=0) {cout<<"Eigen decomposition unsuccessful in lapack_eigen_symmv."<<endl; return;} + dsyevr_(&JOBZ, &RANGE, &UPLO, &N, A->data, &LDA, &VL, &VU, + &IL, &IU, &ABSTOL, &M, eval->data, evec->data, + &LDZ, ISUPPZ, WORK, &LWORK, IWORK, &LIWORK, &INFO); + if (INFO!=0) { + cout << "Eigen decomposition unsuccessful in" << + "lapack_eigen_symmv." << endl; + return; + } gsl_matrix_transpose (evec); @@ -292,9 +395,9 @@ void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, cons return; } -//DO NOT set eigen values to be positive -double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix) -{ +// DO NOT set eigenvalues to be positive. +double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix) { #ifdef WITH_LAPACK lapack_eigen_symmv (G, eval, U, flag_largematrix); #else @@ -302,15 +405,8 @@ double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t gsl_eigen_symmv (G, eval, U, w); gsl_eigen_symmv_free (w); #endif - /* - for (size_t i=0; i<eval->size; ++i) { - if (gsl_vector_get (eval, i)<1e-10) { -// cout<<gsl_vector_get (eval, i)<<endl; - gsl_vector_set (eval, i, 0); - } - } - */ - //calculate track_G=mean(diag(G)) + + // Calculate track_G=mean(diag(G)). double d=0.0; for (size_t i=0; i<eval->size; ++i) { d+=gsl_vector_get(eval, i); @@ -321,55 +417,54 @@ double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t } -//DO NOT set eigen values to be positive -double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float *eval, const size_t flag_largematrix) -{ +// DO NOT set eigen values to be positive. +double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, + gsl_vector_float *eval, const size_t flag_largematrix) { #ifdef WITH_LAPACK lapack_float_eigen_symmv (G, eval, U, flag_largematrix); #else - //gsl doesn't provide float precision eigen decomposition; plus, float precision eigen decomposition in lapack may not work on OS 10.4 - //first change to double precision + + // GSL doesn't provide single precision eigen decomposition; + // plus, float precision eigen decomposition in LAPACK may not + // work on OS 10.4 first change to double precision. gsl_matrix *G_double=gsl_matrix_alloc (G->size1, G->size2); gsl_matrix *U_double=gsl_matrix_alloc (U->size1, U->size2); gsl_vector *eval_double=gsl_vector_alloc (eval->size); for (size_t i=0; i<G->size1; i++) { for (size_t j=0; j<G->size2; j++) { - gsl_matrix_set(G_double, i, j, gsl_matrix_float_get(G, i, j)); + gsl_matrix_set(G_double, i, j, + gsl_matrix_float_get(G, i, j)); } } gsl_eigen_symmv_workspace *w_space=gsl_eigen_symmv_alloc (G->size1); gsl_eigen_symmv (G_double, eval_double, U_double, w_space); gsl_eigen_symmv_free (w_space); - //change back to float precision + // Change back to float precision. for (size_t i=0; i<G->size1; i++) { for (size_t j=0; j<G->size2; j++) { - gsl_matrix_float_set(K, i, j, gsl_matrix_get(G_double, i, j)); + gsl_matrix_float_set(K, i, j, + gsl_matrix_get(G_double, i, j)); } } for (size_t i=0; i<U->size1; i++) { for (size_t j=0; j<U->size2; j++) { - gsl_matrix_float_set(U, i, j, gsl_matrix_get(U_double, i, j)); + gsl_matrix_float_set(U, i, j, + gsl_matrix_get(U_double, i, j)); } } for (size_t i=0; i<eval->size; i++) { gsl_vector_float_set(eval, i, gsl_vector_get(eval_double, i)); } - //delete double precision matrices + // Delete double-precision matrices. gsl_matrix_free (G_double); gsl_matrix_free (U_double); gsl_vector_free (eval_double); #endif - /* - for (size_t i=0; i<eval->size; ++i) { - if (gsl_vector_float_get (eval, i)<1e-10) { - gsl_vector_float_set (eval, i, 0); - } - } - */ - //calculate track_G=mean(diag(G)) - double d=0.0; + + // Calculate track_G=mean(diag(G)). + double d = 0.0; for (size_t i=0; i<eval->size; ++i) { d+=gsl_vector_float_get(eval, i); } @@ -379,8 +474,7 @@ double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float * } -double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) -{ +double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { double logdet_O=0.0; #ifdef WITH_LAPACK @@ -394,7 +488,6 @@ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) int status = gsl_linalg_cholesky_decomp(Omega); if(status == GSL_EDOM) { cout << "## non-positive definite matrix" << endl; - // exit(0); } for (size_t i=0; i<Omega->size1; ++i) { @@ -404,16 +497,15 @@ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) gsl_vector_memcpy (OiXty, Xty); gsl_blas_dtrsv(CblasLower, CblasNoTrans, CblasNonUnit, Omega, OiXty); - gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); - // gsl_linalg_cholesky_solve(XtX, Xty, iXty); + gsl_blas_dtrsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); #endif return logdet_O; } -double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_float *OiXty) -{ +double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, + gsl_vector_float *OiXty) { double logdet_O=0.0; #ifdef WITH_LAPACK @@ -436,7 +528,6 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_ int status = gsl_linalg_cholesky_decomp(Omega_double); if(status == GSL_EDOM) { cout << "## non-positive definite matrix" << endl; - // exit(0); } for (size_t i=0; i<Omega->size1; ++i) { @@ -450,8 +541,7 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_ gsl_vector_float_memcpy (OiXty, Xty); gsl_blas_strsv(CblasLower, CblasNoTrans, CblasNonUnit, Omega, OiXty); - gsl_blas_strsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); - // gsl_linalg_cholesky_solve(XtX, Xty, iXty); + gsl_blas_strsv(CblasUpper, CblasNoTrans, CblasNonUnit, Omega, OiXty); gsl_matrix_free (Omega_double); #endif @@ -460,129 +550,124 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_ } -//LU decomposition -void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum) -{ +// LU decomposition. +void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum) { gsl_linalg_LU_decomp (LU, p, signum); return; } -void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum) -{ +void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum) { gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<LU->size1; i++) { for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + gsl_matrix_set (LU_double, i, j, + gsl_matrix_float_get(LU, i, j)); } } - //LU decomposition + // LU decomposition. gsl_linalg_LU_decomp (LU_double, p, signum); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<LU->size1; i++) { for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_float_set (LU, i, j, gsl_matrix_get(LU_double, i, j)); + gsl_matrix_float_set (LU, i, j, + gsl_matrix_get(LU_double, i, j)); } } - //free matrix + // Free matrix. gsl_matrix_free (LU_double); return; } -//LU invert -void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *inverse) -{ +// LU invert. +void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, + gsl_matrix *inverse) { gsl_linalg_LU_invert (LU, p, inverse); return; } -void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, gsl_matrix_float *inverse) -{ +void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, + gsl_matrix_float *inverse) { gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); - gsl_matrix *inverse_double=gsl_matrix_alloc (inverse->size1, inverse->size2); + gsl_matrix *inverse_double=gsl_matrix_alloc (inverse->size1, + inverse->size2); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<LU->size1; i++) { for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + gsl_matrix_set (LU_double, i, j, + gsl_matrix_float_get(LU, i, j)); } } - //LU decomposition + // LU decomposition. gsl_linalg_LU_invert (LU_double, p, inverse_double); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<inverse->size1; i++) { for (size_t j=0; j<inverse->size2; j++) { - gsl_matrix_float_set (inverse, i, j, gsl_matrix_get(inverse_double, i, j)); + gsl_matrix_float_set (inverse, i, j, + gsl_matrix_get(inverse_double, + i, j)); } } - //free matrix + // Free matrix. gsl_matrix_free (LU_double); gsl_matrix_free (inverse_double); return; } -//LU lndet -double LULndet (gsl_matrix *LU) -{ +// LU lndet. +double LULndet (gsl_matrix *LU) { double d; d=gsl_linalg_LU_lndet (LU); return d; } -double LULndet (gsl_matrix_float *LU) -{ +double LULndet (gsl_matrix_float *LU) { gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); double d; - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<LU->size1; i++) { for (size_t j=0; j<LU->size2; j++) { gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j)); } } - //LU decomposition + // LU decomposition. d=gsl_linalg_LU_lndet (LU_double); - //copy float matrix to double - /* - for (size_t i=0; i<LU->size1; i++) { - for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_float_set (LU, i, j, gsl_matrix_get(LU_double, i, j)); - } - } - */ - //free matrix + // Free matrix gsl_matrix_free (LU_double); return d; } -//LU solve -void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, const gsl_vector *b, gsl_vector *x) -{ +// LU solve. +void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, + const gsl_vector *b, gsl_vector *x) { gsl_linalg_LU_solve (LU, p, b, x); return; } -void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, const gsl_vector_float *b, gsl_vector_float *x) -{ +void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, + const gsl_vector_float *b, gsl_vector_float *x) { gsl_matrix *LU_double=gsl_matrix_alloc (LU->size1, LU->size2); gsl_vector *b_double=gsl_vector_alloc (b->size); gsl_vector *x_double=gsl_vector_alloc (x->size); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<LU->size1; i++) { for (size_t j=0; j<LU->size2; j++) { - gsl_matrix_set (LU_double, i, j, gsl_matrix_float_get(LU, i, j)); + gsl_matrix_set (LU_double, i, j, + gsl_matrix_float_get(LU, i, j)); } } @@ -594,15 +679,15 @@ void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, const gsl_ve gsl_vector_set (x_double, i, gsl_vector_float_get(x, i)); } - //LU decomposition + // LU decomposition. gsl_linalg_LU_solve (LU_double, p, b_double, x_double); - //copy float matrix to double + // Copy float matrix to double. for (size_t i=0; i<x->size; i++) { gsl_vector_float_set (x, i, gsl_vector_get(x_double, i)); } - //free matrix + // Free matrix. gsl_matrix_free (LU_double); gsl_vector_free (b_double); gsl_vector_free (x_double); @@ -610,8 +695,7 @@ void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, const gsl_ve } -bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) -{ +bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) { bool flag=false; int incx=1; int incy=1; @@ -625,8 +709,7 @@ bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) } -bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) -{ +bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) { bool flag=false; int incx=1; int incy=1; diff --git a/src/lapack.h b/src/lapack.h index e5a1d37..5277b2f 100644 --- a/src/lapack.h +++ b/src/lapack.h @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __LAPACK_H__ @@ -23,31 +23,47 @@ using namespace std; - void lapack_float_cholesky_decomp (gsl_matrix_float *A); void lapack_cholesky_decomp (gsl_matrix *A); -void lapack_float_cholesky_solve (gsl_matrix_float *A, const gsl_vector_float *b, gsl_vector_float *x); +void lapack_float_cholesky_solve (gsl_matrix_float *A, + const gsl_vector_float *b, + gsl_vector_float *x); void lapack_cholesky_solve (gsl_matrix *A, const gsl_vector *b, gsl_vector *x); -void lapack_sgemm (char *TransA, char *TransB, float alpha, const gsl_matrix_float *A, const gsl_matrix_float *B, float beta, gsl_matrix_float *C); -void lapack_dgemm (char *TransA, char *TransB, double alpha, const gsl_matrix *A, const gsl_matrix *B, double beta, gsl_matrix *C); -void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, gsl_matrix_float *evec, const size_t flag_largematrix); -void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, const size_t flag_largematrix); +void lapack_sgemm (char *TransA, char *TransB, float alpha, + const gsl_matrix_float *A, const gsl_matrix_float *B, + float beta, gsl_matrix_float *C); +void lapack_dgemm (char *TransA, char *TransB, double alpha, + const gsl_matrix *A, const gsl_matrix *B, + double beta, gsl_matrix *C); +void lapack_float_eigen_symmv (gsl_matrix_float *A, gsl_vector_float *eval, + gsl_matrix_float *evec, + const size_t flag_largematrix); +void lapack_eigen_symmv (gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, + const size_t flag_largematrix); -double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix); -double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float *eval, const size_t flag_largematrix); +double EigenDecomp (gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix); +double EigenDecomp (gsl_matrix_float *G, gsl_matrix_float *U, + gsl_vector_float *eval, const size_t flag_largematrix); double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty); -double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_float *OiXty); +double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, + gsl_vector_float *OiXty); void LUDecomp (gsl_matrix *LU, gsl_permutation *p, int *signum); void LUDecomp (gsl_matrix_float *LU, gsl_permutation *p, int *signum); -void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *inverse); -void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, gsl_matrix_float *inverse); +void LUInvert (const gsl_matrix *LU, const gsl_permutation *p, + gsl_matrix *inverse); +void LUInvert (const gsl_matrix_float *LU, const gsl_permutation *p, + gsl_matrix_float *inverse); double LULndet (gsl_matrix *LU); double LULndet (gsl_matrix_float *LU); -void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, const gsl_vector *b, gsl_vector *x); -void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, const gsl_vector_float *b, gsl_vector_float *x); +void LUSolve (const gsl_matrix *LU, const gsl_permutation *p, + const gsl_vector *b, gsl_vector *x); +void LUSolve (const gsl_matrix_float *LU, const gsl_permutation *p, + const gsl_vector_float *b, gsl_vector_float *x); bool lapack_ddot(vector<double> &x, vector<double> &y, double &v); bool lapack_sdot(vector<float> &x, vector<float> &y, double &v); + #endif diff --git a/src/param.cpp b/src/param.cpp index 4b8c3a4..63bf349 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -1,6 +1,6 @@ /* Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <iostream> @@ -33,29 +33,20 @@ #include "eigenlib.h" #include "mathfunc.h" - -#ifdef FORCE_FLOAT -#include "param_float.h" -#include "io_float.h" -#else #include "param.h" #include "io.h" -#endif using namespace std; - - - - PARAM::PARAM(void): mode_silence (false), a_mode (0), k_mode(1), d_pace (100000), file_out("result"), path_out("./output/"), miss_level(0.05), maf_level(0.01), hwe_level(0), r2_level(0.9999), -l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001),nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0), +l_min(1e-5), l_max(1e5), n_region(10),p_nr(0.001),em_prec(0.0001), +nr_prec(0.0001),em_iter(10000),nr_iter(100),crt(0), pheno_mean(0), noconstrain (false), -h_min(-1), h_max(-1), h_scale(-1), -rho_min(0.0), rho_max(1.0), rho_scale(-1), +h_min(-1), h_max(-1), h_scale(-1), +rho_min(0.0), rho_max(1.0), rho_scale(-1), logp_min(0.0), logp_max(0.0), logp_scale(-1), h_ngrid(10), rho_ngrid(10), s_min(0), s_max(300), @@ -68,31 +59,22 @@ randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200), error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0), -time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) +time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0), +time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {} - -//read files -//obtain ns_total, ng_total, ns_test, ni_test -void PARAM::ReadFiles (void) -{ +// Read files: obtain ns_total, ng_total, ns_test, ni_test. +void PARAM::ReadFiles (void) { string file_str; - /* - //read continuous cat file - if (!file_mcatc.empty()) { - if (ReadFile_mcatc (file_mcatc, mapRS2catc, n_cat)==false) {error=true;} - } else if (!file_catc.empty()) { - if (ReadFile_catc (file_catc, mapRS2catc, n_cat)==false) {error=true;} - } - */ - //read cat file + + // Read cat file. if (!file_mcat.empty()) { if (ReadFile_mcat (file_mcat, mapRS2cat, n_vc)==false) {error=true;} } else if (!file_cat.empty()) { if (ReadFile_cat (file_cat, mapRS2cat, n_vc)==false) {error=true;} } - //read snp weight files + // Read snp weight files. if (!file_wcat.empty()) { if (ReadFile_wsnp (file_wcat, n_vc, mapRS2wcat)==false) {error=true;} } @@ -100,45 +82,60 @@ void PARAM::ReadFiles (void) if (ReadFile_wsnp (file_wsnp, mapRS2wsnp)==false) {error=true;} } - //count number of kinship files + // Count number of kinship files. if (!file_mk.empty()) { if (CountFileLines (file_mk, n_vc)==false) {error=true;} } - //read snp set + // Read SNP set. if (!file_snps.empty()) { if (ReadFile_snps (file_snps, setSnps)==false) {error=true;} } else { setSnps.clear(); } - //for prediction + // For prediction. if (!file_epm.empty()) { - if (ReadFile_est (file_epm, est_column, mapRS2est)==false) {error=true;} - + if (ReadFile_est (file_epm, est_column, mapRS2est)==false) { + error=true; + } if (!file_bfile.empty()) { file_str=file_bfile+".bim"; - if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - + if (ReadFile_bim (file_str, snpInfo)==false) { + error=true; + } file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;} + if (ReadFile_fam (file_str, indicator_pheno, pheno, + mapID2num, p_column)==false) { + error=true; + } } if (!file_geno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + if (ReadFile_pheno (file_pheno, indicator_pheno, + pheno, p_column)==false) { + error=true; + } - if (CountFileLines (file_geno, ns_total)==false) {error=true;} + if (CountFileLines (file_geno, ns_total)==false) { + error=true; + } } if (!file_ebv.empty() ) { - if (ReadFile_column (file_ebv, indicator_bv, vec_bv, 1)==false) {error=true;} + if (ReadFile_column (file_ebv, indicator_bv, + vec_bv, 1)==false) { + error=true; + } } if (!file_log.empty() ) { - if (ReadFile_log (file_log, pheno_mean)==false) {error=true;} + if (ReadFile_log (file_log, pheno_mean)==false) { + error=true; + } } - //convert indicator_pheno to indicator_idv + // Convert indicator_pheno to indicator_idv. int k=1; for (size_t i=0; i<indicator_pheno.size(); i++) { k=1; @@ -153,10 +150,12 @@ void PARAM::ReadFiles (void) return; } - //read covariates before the genotype files + // Read covariates before the genotype files. if (!file_cvt.empty() ) { - if (ReadFile_cvt (file_cvt, indicator_cvt, cvt, n_cvt)==false) {error=true;} - + if (ReadFile_cvt (file_cvt, indicator_cvt, + cvt, n_cvt)==false) { + error=true; + } if ((indicator_cvt).size()==0) { n_cvt=1; } @@ -165,102 +164,135 @@ void PARAM::ReadFiles (void) } if (!file_gxe.empty() ) { - if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) {error=true;} + if (ReadFile_column (file_gxe, indicator_gxe, gxe, 1)==false) { + error=true; + } } if (!file_weight.empty() ) { - if (ReadFile_column (file_weight, indicator_weight, weight, 1)==false) {error=true;} + if (ReadFile_column (file_weight, indicator_weight, + weight, 1)==false) { + error=true; + } } - // WJA added - //read genotype and phenotype file for bgen format + // WJA added. + // Read genotype and phenotype file for bgen format. if (!file_oxford.empty()) { file_str=file_oxford+".sample"; - if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,indicator_cvt, cvt, n_cvt)==false) {error=true;} + if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column, + indicator_cvt, cvt, n_cvt)==false) { + error=true; + } if ((indicator_cvt).size()==0) { n_cvt=1; } - // n_cvt=1; - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); file_str=file_oxford+".bgen"; - if (ReadFile_bgen (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test)==false) {error=true;} + if (ReadFile_bgen (file_str, setSnps, W, indicator_idv, + indicator_snp, snpInfo, maf_level, + miss_level, hwe_level, r2_level, + ns_test)==false) { + error=true; + } gsl_matrix_free(W); ns_total=indicator_snp.size(); } - //read genotype and phenotype file for plink format + // Read genotype and phenotype file for PLINK format. if (!file_bfile.empty()) { file_str=file_bfile+".bim"; snpInfo.clear(); if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} - //if both fam file and pheno files are used, use phenotypes inside the pheno file + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. if (!file_pheno.empty()) { - //phenotype file before genotype file - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + + // Phenotype file before genotype file. + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column)==false) {error=true;} } else { file_str=file_bfile+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;} + if (ReadFile_fam (file_str, indicator_pheno, pheno, + mapID2num, p_column)==false) {error=true;} } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); file_str=file_bfile+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test)==false) {error=true;} - + if (ReadFile_bed (file_str, setSnps, W, indicator_idv, + indicator_snp, snpInfo, maf_level, + miss_level, hwe_level, r2_level, + ns_test) == false) { + error=true; + } gsl_matrix_free(W); - ns_total=indicator_snp.size(); } - //read genotype and phenotype file for bimbam format + // Read genotype and phenotype file for BIMBAM format. if (!file_geno.empty()) { - //annotation file before genotype file + + // Annotation file before genotype file. if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, mapRS2cM)==false) {error=true;} + if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, + mapRS2cM)==false) { + error=true; + } } - //phenotype file before genotype file - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + // Phenotype file before genotype file. + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column) == false) { + error=true; + } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); - if (ReadFile_geno (file_geno, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test)==false) {error=true;} - + if (ReadFile_geno (file_geno, setSnps, W, indicator_idv, + indicator_snp, maf_level, miss_level, + hwe_level, r2_level, mapRS2chr, mapRS2bp, + mapRS2cM, snpInfo, ns_test)==false) { + error=true; + } gsl_matrix_free(W); - ns_total=indicator_snp.size(); } - //read genotype file for multiple plink files + // Read genotype file for multiple PLINK files. if (!file_mbfile.empty()) { igzstream infile (file_mbfile.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open mbfile file: "<<file_mbfile<<endl; return;} + if (!infile) { + cout<<"error! fail to open mbfile file: " << file_mbfile<<endl; + return; + } string file_name; - size_t t=0, ns_test_tmp=0; - gsl_matrix *W; while (!safeGetline(infile, file_name).eof()) { file_str=file_name+".bim"; @@ -268,25 +300,40 @@ void PARAM::ReadFiles (void) if (ReadFile_bim (file_str, snpInfo)==false) {error=true;} if (t==0) { - //if both fam file and pheno files are used, use phenotypes inside the pheno file + + // If both fam file and pheno files are used, use + // phenotypes inside the pheno file. if (!file_pheno.empty()) { - //phenotype file before genotype file - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + + // Phenotype file before genotype file. + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column)==false) { + error=true; + } } else { file_str=file_name+".fam"; - if (ReadFile_fam (file_str, indicator_pheno, pheno, mapID2num, p_column)==false) {error=true;} + if (ReadFile_fam (file_str, indicator_pheno, pheno, + mapID2num, p_column)==false) { + error=true; + } } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); } file_str=file_name+".bed"; - if (ReadFile_bed (file_str, setSnps, W, indicator_idv, indicator_snp, snpInfo, maf_level, miss_level, hwe_level, r2_level, ns_test_tmp)==false) {error=true;} + if (ReadFile_bed (file_str, setSnps, W, indicator_idv, + indicator_snp, snpInfo, maf_level, + miss_level, hwe_level, r2_level, + ns_test_tmp)==false) { + error=true; + } mindicator_snp.push_back(indicator_snp); msnpInfo.push_back(snpInfo); ns_test+=ns_test_tmp; @@ -303,30 +350,46 @@ void PARAM::ReadFiles (void) - //read genotype and phenotype file for multiple bimbam files + // Read genotype and phenotype file for multiple BIMBAM files. if (!file_mgeno.empty()) { - //annotation file before genotype file + + // Annotation file before genotype file. if (!file_anno.empty() ) { - if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, mapRS2cM)==false) {error=true;} + if (ReadFile_anno (file_anno, mapRS2chr, mapRS2bp, + mapRS2cM)==false) { + error=true; + } } - //phenotype file before genotype file - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + // Phenotype file before genotype file. + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column)==false) { + error=true; + } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain ni_test, + // save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); igzstream infile (file_mgeno.c_str(), igzstream::in); - if (!infile) {cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl; return;} + if (!infile) { + cout<<"error! fail to open mgeno file: "<<file_mgeno<<endl; + return; + } string file_name; size_t ns_test_tmp; while (!safeGetline(infile, file_name).eof()) { - if (ReadFile_geno (file_name, setSnps, W, indicator_idv, indicator_snp, maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp)==false) {error=true;} + if (ReadFile_geno (file_name, setSnps, W, indicator_idv, + indicator_snp, maf_level, miss_level, + hwe_level, r2_level, mapRS2chr, mapRS2bp, + mapRS2cM, snpInfo, ns_test_tmp)==false) { + error=true; + } mindicator_snp.push_back(indicator_snp); msnpInfo.push_back(snpInfo); @@ -343,9 +406,10 @@ void PARAM::ReadFiles (void) if (!file_gene.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column)==false) {error=true;} - //convert indicator_pheno to indicator_idv + // Convert indicator_pheno to indicator_idv. int k=1; for (size_t i=0; i<indicator_pheno.size(); i++) { k=1; @@ -355,57 +419,69 @@ void PARAM::ReadFiles (void) indicator_idv.push_back(k); } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); - //obtain covariate matrix + // Obtain covariate matrix. gsl_matrix *W=gsl_matrix_alloc (ni_test, n_cvt); CopyCvt (W); - if (ReadFile_gene (file_gene, vec_read, snpInfo, ng_total)==false) {error=true;} + if (ReadFile_gene (file_gene, vec_read, snpInfo, + ng_total)==false) { + error=true; + } } - //read is after gene file + // Read is after gene file. if (!file_read.empty() ) { - if (ReadFile_column (file_read, indicator_read, vec_read, 1)==false) {error=true;} + if (ReadFile_column (file_read, indicator_read, + vec_read, 1)==false) { + error=true; + } ni_test=0; - for (vector<int>::size_type i=0; i<(indicator_idv).size(); ++i) { + for (vector<int>::size_type i=0; + i<(indicator_idv).size(); + ++i) { indicator_idv[i]*=indicator_read[i]; ni_test+=indicator_idv[i]; } if (ni_test==0) { - error=true; - cout<<"error! number of analyzed individuals equals 0. "<<endl; - return; + error=true; + cout<<"error! number of analyzed individuals equals 0. "<< + endl; + return; } } - //for ridge prediction, read phenotype only + // For ridge prediction, read phenotype only. if (file_geno.empty() && file_gene.empty() && !file_pheno.empty()) { - if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, p_column)==false) {error=true;} + if (ReadFile_pheno (file_pheno, indicator_pheno, pheno, + p_column)==false) { + error=true; + } - //post-process covariates and phenotypes, obtain ni_test, save all useful covariates + // Post-process covariates and phenotypes, obtain + // ni_test, save all useful covariates. ProcessCvtPhen(); } - return; } - - - - - void PARAM::CheckParam (void) { struct stat fileInfo; string str; - //check parameters - if (k_mode!=1 && k_mode!=2) {cout<<"error! unknown kinship/relatedness input mode: "<<k_mode<<endl; error=true;} + // Check parameters. + if (k_mode!=1 && k_mode!=2) { + cout<<"error! unknown kinship/relatedness input mode: "<< + k_mode<<endl; + error=true; + } if (a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=5 && a_mode!=11 && a_mode!=12 && a_mode!=13 && a_mode!=14 && a_mode!=15 && a_mode!=21 && a_mode!=22 && a_mode!=25 && a_mode!=26 && a_mode!=27 && a_mode!=28 && a_mode!=31 && a_mode!=41 && a_mode!=42 && a_mode!=43 && a_mode!=51 && a_mode!=52 && a_mode!=53 && a_mode!=54 && a_mode!=61 && a_mode!=62 && a_mode!=63 && a_mode!=66 && a_mode!=67 && a_mode!=71) {cout<<"error! unknown analysis mode: "<<a_mode<<". make sure -gk or -eigen or -lmm or -bslmm -predict or -calccov is sepcified correctly."<<endl; error=true;} if (miss_level>1) {cout<<"error! missing level needs to be between 0 and 1. current value = "<<miss_level<<endl; error=true;} @@ -447,13 +523,10 @@ void PARAM::CheckParam (void) } } - //sort (p_column.begin(), p_column.end() ); n_ph=p_column.size(); - - - //only lmm option (and one prediction option) can deal with multiple phenotypes - //and no gene expression files + // Only LMM option (and one prediction option) can deal with + // multiple phenotypes and no gene expression files. if (n_ph>1 && a_mode!=1 && a_mode!=2 && a_mode!=3 && a_mode!=4 && a_mode!=43) { cout<<"error! the current analysis mode "<<a_mode<<" can not deal with multiple phenotypes."<<endl; error=true; } diff --git a/src/param.h b/src/param.h index 72b7d85..18d5e36 100644 --- a/src/param.h +++ b/src/param.h @@ -1,6 +1,6 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011 Xiang Zhou + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017 Xiang Zhou This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,7 +13,7 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __PARAM_H__ @@ -27,8 +27,6 @@ using namespace std; - - class SNPINFO { public: string chr; @@ -40,37 +38,36 @@ public: size_t n_miss; double missingness; double maf; - size_t n_idv;//number of non-missing individuals - size_t n_nb;//number of neighbours on the right hand side - size_t file_position;//snp location on file + size_t n_idv; // Number of non-missing individuals. + size_t n_nb; // Number of neighbours on the right hand side. + size_t file_position; // SNP location in file. }; -//results for lmm +// Results for LMM. class SUMSTAT { public: - double beta; //REML estimator for beta - double se; //SE for beta - double lambda_remle; //REML estimator for lambda - double lambda_mle; //MLE estimator for lambda - double p_wald; //p value from a Wald test - double p_lrt; //p value from a likelihood ratio test - double p_score; //p value from a score test + double beta; // REML estimator for beta. + double se; // SE for beta. + double lambda_remle; // REML estimator for lambda. + double lambda_mle; // MLE estimator for lambda. + double p_wald; // p value from a Wald test. + double p_lrt; // p value from a likelihood ratio test. + double p_score; // p value from a score test. }; -//results for mvlmm +// Results for mvLMM. class MPHSUMSTAT { public: - vector<double> v_beta; //REML estimator for beta - double p_wald; //p value from a Wald test - double p_lrt; //p value from a likelihood ratio test - double p_score; //p value from a score test - vector<double> v_Vg; //estimator for Vg, right half - vector<double> v_Ve; //estimator for Ve, right half - vector<double> v_Vbeta; //estimator for Vbeta, right half + vector<double> v_beta; // REML estimator for beta. + double p_wald; // p value from a Wald test. + double p_lrt; // p value from a likelihood ratio test. + double p_score; // p value from a score test. + vector<double> v_Vg; // Estimator for Vg, right half. + vector<double> v_Ve; // Estimator for Ve, right half. + vector<double> v_Vbeta; // Estimator for Vbeta, right half. }; - -//hyper-parameters for bslmm +// Hyper-parameters for BSLMM. class HYPBSLMM { public: double h; @@ -78,15 +75,11 @@ public: double rho; double pge; double logp; - size_t n_gamma; }; - -//header class -class HEADER -{ - +// Header class. +class HEADER { public: size_t rs_col; size_t chr_col; @@ -108,28 +101,26 @@ public: size_t var_col; size_t ws_col; size_t cor_col; - size_t coln;//number of columns + size_t coln; // Number of columns. set<size_t> catc_col; set<size_t> catd_col; }; - - class PARAM { public: - // IO related parameters + // IO-related parameters. bool mode_silence; - int a_mode; //analysis mode, 1/2/3/4 for Frequentist tests - int k_mode; //kinship read mode: 1: n by n matrix, 2: id/id/k_value; - vector<size_t> p_column; //which phenotype column needs analysis - size_t d_pace; //display pace + int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests + int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; + vector<size_t> p_column; // Which phenotype column needs analysis. + size_t d_pace; // Display pace string file_bfile, file_mbfile; string file_geno, file_mgeno; string file_pheno; - string file_anno; //optional - string file_gxe; //optional - string file_cvt; //optional + string file_anno; // Optional. + string file_gxe; // Optional. + string file_cvt; // Optional. string file_cat, file_mcat; string file_catc, file_mcatc; string file_var; @@ -144,26 +135,23 @@ public: string file_bf, file_hyp; string path_out; - - string file_epm; //estimated parameter file - string file_ebv; //estimated breeding value file - string file_log; //log file containing mean estimate - - string file_read; //file containing total number of reads - string file_gene; //gene expression file - - string file_snps; //file containing analyzed snps or genes -// WJA Added + string file_epm; // Estimated parameter file. + string file_ebv; // Estimated breeding value file. + string file_log; // Log file containing mean estimate. + string file_read; // File containing total number of reads. + string file_gene; // Gene expression file. + string file_snps; // File containing analyzed SNPs or genes. + + // WJA added. string file_oxford; - - // QC related parameters + // QC-related parameters. double miss_level; double maf_level; double hwe_level; double r2_level; - // LMM related parameters + // LMM-related parameters. double l_min; double l_max; size_t n_region; @@ -172,16 +160,18 @@ public: double pve_null, pve_se_null, pve_total, se_pve_total; double vg_remle_null, ve_remle_null, vg_mle_null, ve_mle_null; vector<double> Vg_remle_null, Ve_remle_null, Vg_mle_null, Ve_mle_null; - vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null, VVe_mle_null; - vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null, se_beta_mle_null; + vector<double> VVg_remle_null, VVe_remle_null, VVg_mle_null; + vector<double> VVe_mle_null; + vector<double> beta_remle_null, se_beta_remle_null, beta_mle_null; + vector<double> se_beta_mle_null; double p_nr; double em_prec, nr_prec; size_t em_iter, nr_iter; size_t crt; - double pheno_mean; //phenotype mean from bslmm fitting or for prediction + double pheno_mean; // Phenotype mean from BSLMM fitting or prediction. - //for fitting multiple variance components - //the first three are of size n_vc, and the next two are of size n_vc+1 + // For fitting multiple variance components. + // The first 3 are of size (n_vc), and the next 2 are of size n_vc+1. bool noconstrain; vector<double> v_traceG; vector<double> v_pve; @@ -194,98 +184,141 @@ public: vector<double> v_beta; vector<double> v_se_beta; - // BSLMM MCMC related parameters - double h_min, h_max, h_scale; //priors for h - double rho_min, rho_max, rho_scale; //priors for rho - double logp_min, logp_max, logp_scale; //priors for log(pi) + // BSLMM/MCMC-related parameters. + double h_min, h_max, h_scale; // Priors for h. + double rho_min, rho_max, rho_scale; // Priors for rho. + double logp_min, logp_max, logp_scale; // Priors for log(pi). size_t h_ngrid, rho_ngrid; - size_t s_min, s_max; //minimum and maximum number of gammas - size_t w_step; //number of warm up/burn in iterations - size_t s_step; //number of sampling iterations - size_t r_pace; //record pace - size_t w_pace; //write pace - size_t n_accept; //number of acceptance - size_t n_mh; //number of MH steps within each iteration - double geo_mean; //mean of the geometric distribution + size_t s_min, s_max; // Min & max. number of gammas. + size_t w_step; // # warm up/burn in iter. + size_t s_step; // # sampling iterations. + size_t r_pace; // Record pace. + size_t w_pace; // Write pace. + size_t n_accept; // Number of acceptance. + size_t n_mh; // # MH steps in each iter. + double geo_mean; // Mean of geometric dist. long int randseed; double trace_G; HYPBSLMM cHyp_initial; - //VARCOV related parameters + // VARCOV-related parameters. double window_cm; size_t window_bp; size_t window_ns; - //vc related parameters + // vc-related parameters. size_t n_block; - // Summary statistics + // Summary statistics. bool error; - size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; //number of individuals - size_t np_obs, np_miss; //number of observed and missing phenotypes - size_t ns_total, ns_test, ns_study, ns_ref; //number of snps - size_t ng_total, ng_test; //number of genes - size_t ni_control, ni_case; //number of controls and number of cases - size_t ni_subsample; //number of subsampled individuals - //size_t ni_total_ref, ns_total_ref, ns_pair;//max number of individuals, number of snps and number of snp pairs in the reference panel - size_t n_cvt; //number of covariates - size_t n_cat; //number of continuous categories - size_t n_ph; //number of phenotypes - size_t n_vc; //number of variance components (including the diagonal matrix) - double time_total; //record total time - double time_G; //time spent on reading files the second time and calculate K - double time_eigen; //time spent on eigen-decomposition - double time_UtX; //time spent on calculating UX and Uy - double time_UtZ; //time spent on calculating UtZ, for probit BSLMM - double time_opt; //time spent on optimization iterations/or mcmc - double time_Omega; //time spent on calculating Omega - double time_hyp; //time spent on sampling hyper-parameters, in PMM - double time_Proposal; //time spend on constructing the proposal distribution (i.e. the initial lmm or lm analysis) - - // Data - vector<vector<double> > pheno; //a vector record all phenotypes, NA replaced with -9 - vector<vector<double> > cvt; //a vector record all covariates, NA replaced with -9 - vector<double> gxe; //a vector record all covariates, NA replaced with -9 - vector<double> weight; //a vector record weights for the individuals, which is useful for animal breeding studies - vector<vector<int> > indicator_pheno; //a matrix record when a phenotype is missing for an individual; 0 missing, 1 available - vector<int> indicator_idv; //indicator for individuals (phenotypes), 0 missing, 1 available for analysis - vector<int> indicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis - vector< vector<int> > mindicator_snp; //sequence indicator for SNPs: 0 ignored because of (a) maf, (b) miss, (c) non-poly; 1 available for analysis - vector<int> indicator_cvt; //indicator for covariates, 0 missing, 1 available for analysis - vector<int> indicator_gxe; //indicator for gxe, 0 missing, 1 available for analysis - vector<int> indicator_weight; //indicator for weight, 0 missing, 1 available for analysis - - vector<int> indicator_bv; //indicator for estimated breeding value file, 0 missing, 1 available for analysis - vector<int> indicator_read; //indicator for read file, 0 missing, 1 available for analysis - vector<double> vec_read; //total number of reads - vector<double> vec_bv; //breeding values + + // Number of individuals. + size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref; + + // Number of observed and missing phenotypes. + size_t np_obs, np_miss; + + // Number of SNPs. + size_t ns_total, ns_test, ns_study, ns_ref; + + size_t ng_total, ng_test; // Number of genes. + size_t ni_control, ni_case; // Number of controls and number of cases. + size_t ni_subsample; // Number of subsampled individuals. + size_t n_cvt; // Number of covariates. + size_t n_cat; // Number of continuous categories. + size_t n_ph; // Number of phenotypes. + size_t n_vc; // Number of variance components + // (including the diagonal matrix). + double time_total; // Record total time. + double time_G; // Time spent on reading files the + // second time and calculate K. + double time_eigen; // Time spent on eigen-decomposition. + double time_UtX; // Time spent on calculating UX and Uy. + double time_UtZ; // Time calculating UtZ for probit BSLMM. + double time_opt; // Time on optimization iterations/MCMC. + double time_Omega; // Time spent on calculating Omega. + double time_hyp; // Time sampling hyperparameters in PMM. + double time_Proposal; // Time spent on constructing the + // proposal distribution (i.e. the + // initial LMM or LM analysis). + + // Data. + // Vector recording all phenotypes (NA replaced with -9). + vector<vector<double> > pheno; + + // Vector recording all covariates (NA replaced with -9). + vector<vector<double> > cvt; + + // Vector recording all covariates (NA replaced with -9). + vector<double> gxe; + + // Vector recording weights for the individuals, which is + // useful for animal breeding studies. + vector<double> weight; + + // Matrix recording when a phenotype is missing for an + // individual; 0 missing, 1 available. + vector<vector<int> > indicator_pheno; + + // Indicator for individuals (phenotypes): 0 missing, 1 + // available for analysis + vector<int> indicator_idv; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector<int> indicator_snp; + + // Sequence indicator for SNPs: 0 ignored because of (a) maf, + // (b) miss, (c) non-poly; 1 available for analysis. + vector< vector<int> > mindicator_snp; + + // Indicator for covariates: 0 missing, 1 available for + // analysis. + vector<int> indicator_cvt; + + // Indicator for gxe: 0 missing, 1 available for analysis. + vector<int> indicator_gxe; + + // Indicator for weight: 0 missing, 1 available for analysis. + vector<int> indicator_weight; + + // Indicator for estimated breeding value file: 0 missing, 1 + // available for analysis. + vector<int> indicator_bv; + + // Indicator for read file: 0 missing, 1 available for analysis. + vector<int> indicator_read; + vector<double> vec_read; // Total number of reads. + vector<double> vec_bv; // Breeding values. vector<size_t> est_column; - map<string, int> mapID2num; //map small ID number to number, from 0 to n-1 - map<string, string> mapRS2chr; //map rs# to chromosome location - map<string, long int> mapRS2bp; //map rs# to base position - map<string, double> mapRS2cM; //map rs# to cM - map<string, double> mapRS2est; //map rs# to parameters - map<string, size_t> mapRS2cat; //map rs# to category number - map<string, vector<double> > mapRS2catc; //map rs# to continuous categories - map<string, double> mapRS2wsnp; //map rs# to snp weights - map<string, vector<double> > mapRS2wcat; //map rs# to snp cat weights - - vector<SNPINFO> snpInfo; //record SNP information - vector< vector<SNPINFO> > msnpInfo; //record SNP information - set<string> setSnps; //a set of snps for analysis - - //constructor + map<string, int> mapID2num; // Map small ID to number, 0 to n-1. + map<string, string> mapRS2chr; // Map rs# to chromosome location. + map<string, long int> mapRS2bp; // Map rs# to base position. + map<string, double> mapRS2cM; // Map rs# to cM. + map<string, double> mapRS2est; // Map rs# to parameters. + map<string, size_t> mapRS2cat; // Map rs# to category number. + map<string, vector<double> > mapRS2catc; // Map rs# to cont. cat's. + map<string, double> mapRS2wsnp; // Map rs# to SNP weights. + map<string, vector<double> > mapRS2wcat; // Map rs# to SNP cat weights. + + vector<SNPINFO> snpInfo; // Record SNP information. + vector< vector<SNPINFO> > msnpInfo; // Record SNP information. + set<string> setSnps; // Set of snps for analysis. + + // Constructor. PARAM(); - //functions + // Functions. void ReadFiles (); void CheckParam (); void CheckData (); void PrintSummary (); - void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, const bool calc_K); - void ReadGenotypes (vector<vector<unsigned char> > &Xt, gsl_matrix *K, const bool calc_K); + void ReadGenotypes (gsl_matrix *UtX, gsl_matrix *K, + const bool calc_K); + void ReadGenotypes (vector<vector<unsigned char> > &Xt, + gsl_matrix *K, const bool calc_K); void CheckCvt (); void CopyCvt (gsl_matrix *W); void CopyA (size_t flag, gsl_matrix *A); @@ -295,15 +328,27 @@ public: void CopyCvtPhen (gsl_matrix *W, gsl_vector *y, size_t flag); void CopyCvtPhen (gsl_matrix *W, gsl_matrix *Y, size_t flag); void CalcKin (gsl_matrix *matrix_kin); - void CalcS (const map<string, double> &mapRS2wA, const map<string, double> &mapRS2wK, const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns); - void WriteVector (const gsl_vector *q, const gsl_vector *s, const size_t n_total, const string suffix); + void CalcS (const map<string, double> &mapRS2wA, + const map<string, double> &mapRS2wK, + const gsl_matrix *W, gsl_matrix *A, gsl_matrix *K, + gsl_matrix *S, gsl_matrix *Svar, gsl_vector *ns); + void WriteVector (const gsl_vector *q, const gsl_vector *s, + const size_t n_total, const string suffix); void WriteVar (const string suffix); void WriteMatrix (const gsl_matrix *matrix_U, const string suffix); void WriteVector (const gsl_vector *vector_D, const string suffix); void CopyRead (gsl_vector *log_N); - void ObtainWeight (const set<string> &setSnps_beta, map<string, double> &mapRS2wK); - void UpdateWeight (const size_t pve_flag, const map<string, double> &mapRS2wK, const size_t ni_test, const gsl_vector *ns, map<string, double> &mapRS2wA); - void UpdateSNPnZ (const map<string, double> &mapRS2wA, const map<string, string> &mapRS2A1, const map<string, double> &mapRS2z, gsl_vector *w, gsl_vector *z, vector<size_t> &vec_cat); + void ObtainWeight (const set<string> &setSnps_beta, map<string, + double> &mapRS2wK); + void UpdateWeight (const size_t pve_flag, + const map<string,double> &mapRS2wK, + const size_t ni_test, const gsl_vector *ns, + map<string, double> &mapRS2wA); + void UpdateSNPnZ (const map<string, double> &mapRS2wA, + const map<string, string> &mapRS2A1, + const map<string, double> &mapRS2z, + gsl_vector *w, gsl_vector *z, + vector<size_t> &vec_cat); void UpdateSNP (const map<string, double> &mapRS2wA); }; |