diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/debug.cpp | 28 | ||||
-rw-r--r-- | src/debug.h | 9 | ||||
-rw-r--r-- | src/eigenlib.cpp | 13 | ||||
-rw-r--r-- | src/fastblas.cpp | 212 | ||||
-rw-r--r-- | src/fastblas.h | 29 | ||||
-rw-r--r-- | src/gemma.cpp | 27 | ||||
-rw-r--r-- | src/lmm.cpp | 5 | ||||
-rw-r--r-- | src/mathfunc.cpp | 7 | ||||
-rw-r--r-- | src/mathfunc.h | 5 | ||||
-rw-r--r-- | src/param.h | 2 |
10 files changed, 321 insertions, 16 deletions
diff --git a/src/debug.cpp b/src/debug.cpp index 0d3c9cc..dacb89d 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -18,6 +18,34 @@ #include "debug.h" #include "mathfunc.h" +static bool debug_mode = false; +static bool debug_no_check = false; +static bool debug_strict = false; + +void debug_set_debug_mode(bool setting) { debug_mode = setting; } +void debug_set_no_check_mode(bool setting) {debug_no_check = setting; } +void debug_set_strict_mode(bool setting) { debug_strict = setting; } + +bool is_debug_mode() { return debug_mode; }; +bool is_no_check_mode() { return debug_no_check; }; +bool is_strict_mode() { return debug_strict; }; + +/* + Helper function to make sure gsl allocations do their job because + gsl_matrix_alloc does not initiatize values (behaviour that changed + in GSL2) we introduced a 'strict mode' by initializing the buffer + with NaNs. This happens in STRICT mode without NO-CHECKS + (i.e. -strict option). +*/ +gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols) { + gsl_matrix *m = gsl_matrix_alloc(rows,cols); + enforce_msg(m,"Not enough memory"); // just to be sure when there is no error handler set + if (debug_strict && !debug_no_check) { + gsl_matrix_set_all(m, nan("")); + } + return m; +} + // Helper function called by macro validate_K(K, check) void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line) { if (do_check) { diff --git a/src/debug.h b/src/debug.h index 06ca5cb..0f7b38f 100644 --- a/src/debug.h +++ b/src/debug.h @@ -10,6 +10,15 @@ void gemma_gsl_error_handler (const char * reason, const char * file, int line, int gsl_errno); +void debug_set_debug_mode(bool setting); +void debug_set_no_check_mode(bool setting); +void debug_set_strict_mode(bool setting); + +bool is_debug_mode(); +bool is_no_check_mode(); +bool is_strict_mode(); + +gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols); // Validation routines void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line); diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp index a8c545c..4d6aacc 100644 --- a/src/eigenlib.cpp +++ b/src/eigenlib.cpp @@ -17,16 +17,18 @@ */ #include "Eigen/Dense" -#include "gsl/gsl_linalg.h" +// #include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" -#include "gsl/gsl_vector.h" +// #include "gsl/gsl_vector.h" #include <cmath> #include <iostream> #include <vector> +#include <cblas.h> using namespace std; using namespace Eigen; + // On two different clusters, compare eigen vs lapack/gsl: // // dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl @@ -57,8 +59,6 @@ void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha, C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat; } } - - return; } void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A, @@ -75,15 +75,12 @@ void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A, } else { y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec; } - - return; } void eigenlib_invert(gsl_matrix *A) { Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1, A->size2); A_mat = A_mat.inverse(); - return; } void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) { @@ -92,7 +89,6 @@ void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) { Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec( b->data, b->size, OuterStride<Dynamic>(b->stride)); A_mat = alpha * b_vec * b_vec.transpose() + A_mat; - return; } void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) { @@ -108,5 +104,4 @@ void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) { abort(); eval_vec = es.eigenvalues(); U_mat = es.eigenvectors(); - return; } diff --git a/src/fastblas.cpp b/src/fastblas.cpp new file mode 100644 index 0000000..38ca326 --- /dev/null +++ b/src/fastblas.cpp @@ -0,0 +1,212 @@ +/* + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright (C) 2011-2017, Xiang Zhou + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "gsl/gsl_matrix.h" +#include <cmath> +#include <iomanip> +#include <vector> +#include <cblas.h> +#include "debug.h" +#include "fastblas.h" +#include "mathfunc.h" + +using namespace std; + +/* + Reasonably fast function to copy data from standard C array into + gsl_matrix. Avoid it for performance critical sections. +*/ +gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) { + auto rows = m->size1; + auto cols = m->size2; + if (is_strict_mode()) { // slower correct version + for (auto r=0; r<rows; r++) { + for (auto c=0; c<cols; c++) { + gsl_matrix_set(m,r,c,mem[r*cols+c]); + } + } + } else { // faster goes by row + for (auto r=0; r<rows; r++) { + auto v = gsl_vector_calloc(cols); + assert(v->size == cols); + assert(v->block->size == cols); + assert(v->stride == 1); + memcpy(v->block->data,&mem[r*cols],cols*sizeof(double)); + gsl_matrix_set_row(m,r,v); + } + } + return m; +} + +/* + Helper function fast_cblas_dgemm runs the local dgemm +*/ +void fast_cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, + OPENBLAS_CONST blasint M, + OPENBLAS_CONST blasint N, + OPENBLAS_CONST blasint K, + OPENBLAS_CONST double alpha, + OPENBLAS_CONST double *A, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *B, + OPENBLAS_CONST blasint ldb, + OPENBLAS_CONST double beta, + double *C, + OPENBLAS_CONST blasint ldc) { +#ifndef NDEBUG + size_t i,j; + if (is_debug_mode()) { + printf (" Top left corner of matrix A: \n"); + for (i=0; i<min(M,6); i++) { + for (j=0; j<min(K,6); j++) { + printf ("%12.0f", A[j+i*K]); + } + printf ("\n"); + } + + printf ("\n Top left corner of matrix B: \n"); + for (i=0; i<min(K,6); i++) { + for (j=0; j<min(N,6); j++) { + printf ("%12.0f", B[j+i*N]); + } + printf ("\n"); + } + + printf ("\n Top left corner of matrix C: \n"); + for (i=0; i<min(M,6); i++) { + for (j=0; j<min(N,6); j++) { + printf ("%12.5G", C[j+i*N]); + } + printf ("\n"); + } + + cout << scientific << setprecision(3) << "* RowMajor " << Order << "\t" ; + cout << "transA " << TransA << "\t" ; + cout << "transB " << TransB << "\t" ; + cout << "m " << M << "\t" ; + cout << "n " << N << "\t" ; + cout << "k " << K << "\n" ; + cout << "* lda " << lda << "\t" ; + cout << "ldb " << ldb << "\t" ; + cout << "ldc " << ldc << "\t" ; + cout << "alpha " << alpha << "\t" ; + cout << "beta " << beta << "\n" ; + cout << "* A03 " << A[3] << "\t" ; + cout << "B03 " << B[3] << "\t" ; + cout << "C03 " << C[3] << "\t" ; + cout << "Asum " << sum(A,M,K) << "\t" ; + cout << "Bsum " << sum(B,K,N) << "\n" ; + cout << "Csum " << sum(C,M,N) << "\n" ; + } +#endif // NDEBUG + + cblas_dgemm(Order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc); + +#ifndef NDEBUG + if (is_debug_mode()) { + printf (" Top left corner of matrix A (cols=k %i, rows=m %i): \n",K,M); + for (i=0; i<min(M,6); i++) { + for (j=0; j<min(K,6); j++) { + printf ("%12.0f", A[j+i*K]); + } + printf ("\n"); + } + + printf ("\n Top left corner of matrix B: \n"); + for (i=0; i<min(K,6); i++) { + for (j=0; j<min(N,6); j++) { + printf ("%12.0f", B[j+i*N]); + } + printf ("\n"); + } + + printf ("\n Top left corner of matrix C: \n"); + for (i=0; i<min(M,6); i++) { + for (j=0; j<min(N,6); j++) { + printf ("%12.5G", C[j+i*N]); + } + printf ("\n"); + } + } +#endif // NDEBUG +} + +/* + Helper function fast_cblas_dgemm converts a GEMMA layout to cblas_dgemm. +*/ +static void fast_cblas_dgemm(const char *TransA, const char *TransB, const double alpha, + const gsl_matrix *A, const gsl_matrix *B, const double beta, + gsl_matrix *C) { + // C++ is row-major + auto transA = (*TransA == 'N' || *TransA == 'n' ? CblasNoTrans : CblasTrans); + auto transB = (*TransB == 'N' || *TransB == 'n' ? CblasNoTrans : CblasTrans); + // A(m x k) * B(k x n) = C(m x n)) + auto rowsA = A->size1; + auto colsA = A->size2; + blasint M = A->size1; + blasint K = B->size1; + assert(K == colsA); + blasint N = B->size2; + // cout << M << "," << N "," << K << endl; + // Layout = CblasRowMajor: Trans: K , NoTrans M + blasint lda = (transA==CblasNoTrans ? K : M ); + blasint ldb = (transB==CblasNoTrans ? N : K ); + blasint ldc = N; + cout << rowsA << endl; + assert(transA == CblasNoTrans); + assert(transB == CblasNoTrans); + assert(rowsA == 2000); + assert(colsA == 200); + assert(lda == K); + assert(ldb == N); + assert(ldc == N); + assert(A->size2 == A->tda); + assert(B->size2 == B->tda); + assert(C->size2 == C->tda); + + // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, + // m, n, k, alpha, A, k, B, n, beta, C, n); + // m = 2000, k = 200, n = 1000; + assert(M==2000); + assert(K==200); + assert(N==1000); + + auto k = K; + auto m = M; + auto n = N; + + fast_cblas_dgemm(CblasRowMajor, transA, transB, M, N, K, alpha, + /* A */ A->data, + /* lda */ lda, + /* B */ B->data, + /* ldb */ ldb, + /* beta */ beta, + /* C */ C->data, ldc); +} + +/* + Use the fasted/supported way to call BLAS dgemm +*/ + +void fast_dgemm(const char *TransA, const char *TransB, const double alpha, + const gsl_matrix *A, const gsl_matrix *B, const double beta, + gsl_matrix *C) { + fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C); +} diff --git a/src/fastblas.h b/src/fastblas.h new file mode 100644 index 0000000..3c28729 --- /dev/null +++ b/src/fastblas.h @@ -0,0 +1,29 @@ +#ifndef __FASTBLAS_H__ +#define __FASTBLAS_H__ + +#include <assert.h> +#include <iostream> +#include "gsl/gsl_matrix.h" + +gsl_matrix *fast_copy(gsl_matrix *m, const double *mem); + +void fast_cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, + OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, + OPENBLAS_CONST blasint M, + OPENBLAS_CONST blasint N, + OPENBLAS_CONST blasint K, + OPENBLAS_CONST double alpha, + OPENBLAS_CONST double *A, + OPENBLAS_CONST blasint lda, + OPENBLAS_CONST double *B, + OPENBLAS_CONST blasint ldb, + OPENBLAS_CONST double beta, + double *C, + OPENBLAS_CONST blasint ldc); + +void fast_dgemm(const char *TransA, const char *TransB, const double alpha, + const gsl_matrix *A, const gsl_matrix *B, const double beta, + gsl_matrix *C); + +#endif diff --git a/src/gemma.cpp b/src/gemma.cpp index 76ff999..95630c6 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -23,6 +23,17 @@ #include <iostream> #include <string> #include <sys/stat.h> +#ifdef OPENBLAS +#pragma message "Compiling with OPENBLAS" +extern "C" { + // these functions are defined in cblas.h - but if we include that we + // conflicts with other BLAS includes + int openblas_get_num_threads(void); + int openblas_get_parallel(void); + char* openblas_get_config(void); + char* openblas_get_corename(void); +} +#endif #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" @@ -1578,10 +1589,13 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { cPar.window_ns = atoi(str.c_str()); } else if (strcmp(argv[i], "-debug") == 0) { cPar.mode_debug = true; + debug_set_debug_mode(true); } else if (strcmp(argv[i], "-no-check") == 0) { cPar.mode_check = false; + debug_set_no_check_mode(true); } else if (strcmp(argv[i], "-strict") == 0) { cPar.mode_strict = true; + debug_set_strict_mode(true); } else { cout << "error! unrecognized option: " << argv[i] << endl; cPar.error = true; @@ -3081,9 +3095,16 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) { } outfile << "##" << endl; - outfile << "## GEMMA Version = " << version << endl; - outfile << "## GSL Version = " << GSL_VERSION << endl; - outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl; + outfile << "## GEMMA Version = " << version << endl; + outfile << "## GSL Version = " << GSL_VERSION << endl; + outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl; + #ifdef OPENBLAS + outfile << "## OpenBlas = " << openblas_get_config() << " - " << openblas_get_corename() << endl; + + outfile << "## threads = " << openblas_get_num_threads() << endl; + string* pStr = new string[4] { "sequential", "threaded", "openmp" }; + outfile << "## parallel type = " << pStr[openblas_get_parallel()] << endl; + #endif outfile << "##" << endl; outfile << "## Command Line Input = "; diff --git a/src/lmm.cpp b/src/lmm.cpp index a49c8c5..71aa184 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -39,6 +39,7 @@ #include "gsl/gsl_vector.h" #include "eigenlib.h" + #include "gzstream.h" #include "io.h" #include "lapack.h" @@ -1269,6 +1270,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, return; } + void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, const gsl_matrix *U, const gsl_vector *eval, const gsl_matrix *UtW, const gsl_vector *Uty, @@ -1358,7 +1360,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, }; const auto num_snps = indicator_snp.size(); - const size_t progress_step = (num_snps/20>d_pace ? num_snps/20 : d_pace); + const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace); for (size_t t = 0; t < num_snps; ++t) { if (t % progress_step == 0 || t == (num_snps - 1)) { @@ -1429,6 +1431,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, batch_compute(msize); } batch_compute(c % msize); + ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1); // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl; cout << endl; diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 4203837..fbbc061 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -313,6 +313,13 @@ bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) { return ret_valid; } +double sum(const double *m, size_t rows, size_t cols) { + double sum = 0.0; + for (auto i = 0; i<rows*cols; i++) + sum += m[i]; + return sum; +} + double SumVector(const gsl_vector *v) { double sum = 0; for (int i = 0; i < v->size; i++ ) { diff --git a/src/mathfunc.h b/src/mathfunc.h index 6e20b37..804bc20 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -27,7 +27,7 @@ #define EIGEN_MINVALUE 1e-10 using namespace std; -using namespace Eigen; + bool has_nan(const vector<double> v); @@ -43,6 +43,7 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G); bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO); bool isMatrixSymmetric(const gsl_matrix *G); gsl_vector *getEigenValues(const gsl_matrix *G); +double sum(const double *m, size_t rows, size_t cols); double SumVector(const gsl_vector *v); double CenterVector(gsl_vector *y); void CenterVector(gsl_vector *y, const gsl_matrix *W); @@ -57,6 +58,6 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H); double UcharToDouble02(const unsigned char c); unsigned char Double02ToUchar(const double dosage); void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, - const size_t i_row, VectorXd &x_row); + const size_t i_row, Eigen::VectorXd &x_row); #endif diff --git a/src/param.h b/src/param.h index efea99a..4b473c0 100644 --- a/src/param.h +++ b/src/param.h @@ -26,7 +26,7 @@ #include <set> #include <vector> -#define K_BATCH_SIZE 1000 // #snps used for batched K +#define K_BATCH_SIZE 10000 // #snps used for batched K #define DEFAULT_PACE 1000 using namespace std; |