diff options
-rw-r--r-- | src/debug.cpp | 27 | ||||
-rw-r--r-- | src/debug.h | 15 | ||||
-rw-r--r-- | src/gemma.cpp | 66 | ||||
-rw-r--r-- | src/lapack.cpp | 60 | ||||
-rw-r--r-- | src/lapack.h | 2 | ||||
-rw-r--r-- | src/mathfunc.cpp | 57 | ||||
-rw-r--r-- | src/mathfunc.h | 9 | ||||
-rw-r--r-- | src/mvlmm.cpp | 9 | ||||
-rw-r--r-- | src/param.h | 3 | ||||
-rw-r--r-- | test/src/unittests-math.cpp | 8 | ||||
-rwxr-xr-x | test/test_suite.sh | 67 |
11 files changed, 185 insertions, 138 deletions
diff --git a/src/debug.cpp b/src/debug.cpp index 4e37538..da0d06f 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -19,12 +19,27 @@ #include "mathfunc.h" // Helper function called by macro validate_K(K, check) -void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) { +void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line) { if (do_check) { - debug_msg("Validating K"); - if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!"); - if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!") if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" ); - if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!"); -; + // debug_msg("Validating K"); + auto eigenvalues = getEigenValues(K); + uint count_small; + if (count_small = count_small_values(eigenvalues,EIGEN_MINVALUE)>1) { + std::string msg = "K has "; + msg += std::to_string(count_small); + msg += " eigenvalues close to zero"; + warning_at_msg(__file,__line,msg); + } + if (!isMatrixIllConditioned(eigenvalues)) + warning_at_msg(__file,__line,"K is ill conditioned!"); + if (!isMatrixSymmetric(K)) + fail_at_msg(strict,__file,__line,"K is not symmetric!" ); + bool negative_values; + if (negative_values = has_negative_values_but_one(eigenvalues)) { + warning_at_msg(__file,__line,"K has more than one negative eigenvalues!"); + } + if (count_small>=0 && negative_values && !isMatrixPositiveDefinite(K)) + fail_at_msg(strict,__file,__line,"K is not positive definite!"); + gsl_vector_free(eigenvalues); } } diff --git a/src/debug.h b/src/debug.h index 82dd245..f47cb4c 100644 --- a/src/debug.h +++ b/src/debug.h @@ -12,16 +12,21 @@ void gemma_gsl_error_handler (const char * reason, // Validation routines -void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line); +void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line); #define ROUND(f) round(f * 10000.)/10000 -#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__) +#define validate_K(K,check,strict) do_validate_K(K,check,strict,__FILE__,__LINE__) #define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl; -inline void fail_at_msg(const char *__file, int __line, const char *msg) { - std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl; - exit(1); +inline void fail_at_msg(bool strict, const char *__file, int __line, const char *msg) { + if (strict) + std::cerr << "**** STRICT FAIL: "; + else + std::cerr << "**** WARNING: "; + std::cerr << msg << " in " << __file << " at line " << __line << std::endl; + if (strict) + exit(1); } #if defined NDEBUG diff --git a/src/gemma.cpp b/src/gemma.cpp index 4c22329..d3401a6 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -708,7 +708,8 @@ void GEMMA::PrintHelp(size_t option) { if (option == 14) { cout << " DEBUG OPTIONS" << endl; - cout << " -no-checks disable checks" << endl; + cout << " -no-check disable checks" << endl; + cout << " -strict strict mode will stop when there is a problem" << endl; cout << " -silence silent terminal display" << endl; cout << " -debug debug output" << endl; cout << " -nind [num] read up to num individuals" << endl; @@ -1593,8 +1594,10 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { cPar.window_ns = atoi(str.c_str()); } else if (strcmp(argv[i], "-debug") == 0) { cPar.mode_debug = true; - } else if (strcmp(argv[i], "-no-checks") == 0) { + } else if (strcmp(argv[i], "-no-check") == 0) { cPar.mode_check = false; + } else if (strcmp(argv[i], "-strict") == 0) { + cPar.mode_strict = true; } else { cout << "error! unrecognized option: " << argv[i] << endl; cPar.error = true; @@ -1730,7 +1733,7 @@ void GEMMA::BatchRun(PARAM &cPar) { cout << "error! fail to read kinship/relatedness file. " << endl; return; } - // FIXME: this is strange, why read twice? + // This is not so elegant. Reads twice to select on idv and then cvt ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full); if (cPar.error == true) { @@ -1741,20 +1744,12 @@ void GEMMA::BatchRun(PARAM &cPar) { // center matrix G CenterMatrix(G); CenterMatrix(G_full); - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); // eigen-decomposition and calculate trace_G cout << "Start Eigen-Decomposition..." << endl; time_start = clock(); - cPar.trace_G = EigenDecomp(G, U, eval, 0); - cPar.trace_G = 0.0; - for (size_t i = 0; i < eval->size; i++) { - if (gsl_vector_get(eval, i) < 1e-10) { - gsl_vector_set(eval, i, 0); - } - cPar.trace_G += gsl_vector_get(eval, i); - } - cPar.trace_G /= (double)eval->size; + cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // calculate UtW and Uty @@ -1889,7 +1884,7 @@ void GEMMA::BatchRun(PARAM &cPar) { } // Now we have the Kinship matrix test it - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); if (cPar.a_mode == 21) { cPar.WriteMatrix(G, "cXX"); @@ -2352,7 +2347,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // center matrix G CenterMatrix(G); - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); (cPar.v_traceG).clear(); double d = 0; @@ -2721,7 +2716,7 @@ return;} // center matrix G CenterMatrix(G); - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); // is residual weights are provided, then if (!cPar.file_weight.empty()) { @@ -2750,9 +2745,9 @@ return;} time_start = clock(); if (cPar.a_mode == 31) { - cPar.trace_G = EigenDecomp(G, U, eval, 1); + cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 1); } else { - cPar.trace_G = EigenDecomp(G, U, eval, 0); + cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); } if (!cPar.file_weight.empty()) { @@ -2769,15 +2764,6 @@ return;} } } - cPar.trace_G = 0.0; - for (size_t i = 0; i < eval->size; i++) { - if (gsl_vector_get(eval, i) < 1e-10) { - gsl_vector_set(eval, i, 0); - } - cPar.trace_G += gsl_vector_get(eval, i); - } - cPar.trace_G /= (double)eval->size; - cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); } else { @@ -3037,7 +3023,7 @@ return;} // center matrix G CenterMatrix(G); - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); } else { cPar.ReadGenotypes(UtX, G, true); } @@ -3045,15 +3031,9 @@ return;} // eigen-decomposition and calculate trace_G cout << "Start Eigen-Decomposition..." << endl; time_start = clock(); - cPar.trace_G = EigenDecomp(G, U, eval, 0); - cPar.trace_G = 0.0; - for (size_t i = 0; i < eval->size; i++) { - if (gsl_vector_get(eval, i) < 1e-10) { - gsl_vector_set(eval, i, 0); - } - cPar.trace_G += gsl_vector_get(eval, i); - } - cPar.trace_G /= (double)eval->size; + + cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); + cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); @@ -3154,7 +3134,7 @@ return;} // center matrix G CenterMatrix(G); - validate_K(G,cPar.mode_check); + validate_K(G,cPar.mode_check,cPar.mode_strict); } else { cPar.ReadGenotypes(UtX, G, true); @@ -3163,15 +3143,7 @@ return;} // eigen-decomposition and calculate trace_G cout << "Start Eigen-Decomposition..." << endl; time_start = clock(); - cPar.trace_G = EigenDecomp(G, U, eval, 0); - cPar.trace_G = 0.0; - for (size_t i = 0; i < eval->size; i++) { - if (gsl_vector_get(eval, i) < 1e-10) { - gsl_vector_set(eval, i, 0); - } - cPar.trace_G += gsl_vector_get(eval, i); - } - cPar.trace_G /= (double)eval->size; + cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); diff --git a/src/lapack.cpp b/src/lapack.cpp index 8f6e8ff..5b2fc56 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -23,8 +23,12 @@ #include <iostream> #include <vector> +#include "debug.h" +#include "mathfunc.h" + using namespace std; +/* extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, float *ALPHA, float *A, int *LDA, float *B, int *LDB, float *BETA, float *C, int *LDC); @@ -39,6 +43,7 @@ extern "C" void ssyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, float *A, int *ISUPPZ, float *WORK, int *LWORK, int *IWORK, int *LIWORK, int *INFO); extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY); +*/ extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K, double *ALPHA, double *A, int *LDA, double *B, int *LDB, @@ -55,6 +60,7 @@ extern "C" void dsyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, double *A, int *LIWORK, int *INFO); extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY); +/* // Cholesky decomposition, A is destroyed. void lapack_float_cholesky_decomp(gsl_matrix_float *A) { int N = A->size1, LDA = A->size1, INFO; @@ -75,6 +81,7 @@ void lapack_float_cholesky_decomp(gsl_matrix_float *A) { return; } +*/ // Cholesky decomposition, A is destroyed. void lapack_cholesky_decomp(gsl_matrix *A) { @@ -97,6 +104,7 @@ void lapack_cholesky_decomp(gsl_matrix *A) { return; } +/* // Cholesky solve, A is decomposed. void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b, gsl_vector_float *x) { @@ -118,6 +126,7 @@ void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b, return; } +*/ // Cholesky solve, A is decomposed. void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) { @@ -140,6 +149,7 @@ void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) { return; } +/* void lapack_sgemm(char *TransA, char *TransB, float alpha, const gsl_matrix_float *A, const gsl_matrix_float *B, float beta, gsl_matrix_float *C) { @@ -192,6 +202,7 @@ void lapack_sgemm(char *TransA, char *TransB, float alpha, gsl_matrix_float_free(C_t); return; } +*/ void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A, const gsl_matrix *B, double beta, gsl_matrix *C) { @@ -246,6 +257,7 @@ void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A, return; } +/* // Eigen value decomposition, matrix A is destroyed, float seems to // have problem with large matrices (in mac). void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval, @@ -328,7 +340,10 @@ void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval, return; } -// Eigenvalue decomposition, matrix A is destroyed. +*/ + +// Eigenvalue decomposition, matrix A is destroyed. Returns eigenvalues in +// 'eval'. Also returns matrix 'evec' (U). void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, const size_t flag_largematrix) { if (flag_largematrix == 1) { @@ -409,21 +424,45 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, return; } -// DO NOT set eigenvalues to be positive. +// Does NOT set eigenvalues to be positive. G gets destroyed. Returns +// eigen trace and values in U and eval (eigenvalues). double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix) { lapack_eigen_symmv(G, eval, U, flag_largematrix); // Calculate track_G=mean(diag(G)). double d = 0.0; - for (size_t i = 0; i < eval->size; ++i) { + for (size_t i = 0; i < eval->size; ++i) + d += gsl_vector_get(eval, i); + + d /= (double)eval->size; + + return d; +} + +// Same as EigenDecomp but zeroes eigenvalues close to zero. When +// negative eigenvalues remain a warning is issued. +double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix) { + EigenDecomp(G,U,eval,flag_largematrix); + auto d = 0.0; + int count_negative_eigenvalues = 0; + for (size_t i = 0; i < eval->size; i++) { + if (std::abs(gsl_vector_get(eval, i)) < EIGEN_MINVALUE) + gsl_vector_set(eval, i, 0.0); + if (gsl_vector_get(eval,i) < 0.0) + count_negative_eigenvalues += 1; d += gsl_vector_get(eval, i); } d /= (double)eval->size; + if (count_negative_eigenvalues > 0) { + warning_msg("Matrix G has more than one negative eigenvalues!"); + } return d; } +/* // DO NOT set eigen values to be positive. double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float *eval, const size_t flag_largematrix) { @@ -438,6 +477,7 @@ double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, return d; } +*/ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { double logdet_O = 0.0; @@ -452,6 +492,7 @@ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { return logdet_O; } +/* double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, gsl_vector_float *OiXty) { double logdet_O = 0.0; @@ -465,6 +506,7 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty, return logdet_O; } +*/ // LU decomposition. void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { @@ -472,6 +514,7 @@ void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { return; } +/* void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) { gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); @@ -496,6 +539,7 @@ void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) { gsl_matrix_free(LU_double); return; } +*/ // LU invert. void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, @@ -504,6 +548,7 @@ void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, return; } +/* void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, gsl_matrix_float *inverse) { gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); @@ -532,6 +577,8 @@ void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p, return; } +*/ + // LU lndet. double LULndet(gsl_matrix *LU) { double d; @@ -539,6 +586,7 @@ double LULndet(gsl_matrix *LU) { return d; } +/* double LULndet(gsl_matrix_float *LU) { gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); double d; @@ -557,6 +605,7 @@ double LULndet(gsl_matrix_float *LU) { gsl_matrix_free(LU_double); return d; } +*/ // LU solve. void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, @@ -565,6 +614,7 @@ void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, return; } +/* void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p, const gsl_vector_float *b, gsl_vector_float *x) { gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2); @@ -600,7 +650,7 @@ void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p, gsl_vector_free(x_double); return; } - +*/ bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) { bool flag = false; int incx = 1; @@ -614,6 +664,7 @@ bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) { return flag; } +/* bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) { bool flag = false; int incx = 1; @@ -626,3 +677,4 @@ bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) { return flag; } +*/ diff --git a/src/lapack.h b/src/lapack.h index ff02b96..9596667 100644 --- a/src/lapack.h +++ b/src/lapack.h @@ -41,6 +41,8 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec, double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, const size_t flag_largematrix); +double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, + const size_t flag_largematrix); double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U, gsl_vector_float *eval, const size_t flag_largematrix); diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 3b70102..7e10f5a 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -254,59 +254,62 @@ gsl_vector *getEigenValues(const gsl_matrix *G) { // Check whether eigen values are larger than *min* // by default 1E-5. -// Returns success, eigen min, eigen max +// Returns success, eigen min, eigen min-but-1, eigen max -tuple<double, double> abs_minmax(const gsl_vector *v) { - auto min = std::abs(v->data[0]); - auto max = std::abs(v->data[0]); +tuple<double, double, double> abs_minmax(const gsl_vector *v) { + auto min = std::abs(v->data[0]); + auto min1 = std::abs(v->data[0]); + auto max = std::abs(v->data[0]); for (auto i=0; i<v->size; i++) { auto value = std::abs(v->data[i]); - if (value < min) + if (value < min) { + min1 = min; min = value; + } if (value > max) max = value; } - return std::make_tuple(min, max); + return std::make_tuple(min, min1, max); } -bool has_negative_values(const gsl_vector *v) { +// Check for negative values. skip_min will leave out +// the lowest value +bool has_negative_values_but_one(const gsl_vector *v) { + bool one_skipped = false; for (auto i=0; i<v->size; i++) { if (v->data[i] < 0.0) { - return true; + if (one_skipped) + return true; + one_skipped = true; } } return false; } -bool checkMatrixEigen(const gsl_matrix *G, double min) { - auto eigenvalues = getEigenValues(G); - bool ret_valid = true; - - if (has_negative_values(eigenvalues)) - ret_valid = false; - - auto t = abs_minmax(eigenvalues); - auto absmin = get<0>(t); - if (absmin < min) - ret_valid = false; - gsl_vector_free(eigenvalues); - return ret_valid; +uint count_small_values(const gsl_vector *v, double min) { + uint count = 0; + for (auto i=0; i<v->size; i++) { + if (v->data[i] < min) + count += 1; + } + return count; } -bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) { +// Check for matrix being ill conditioned by taking the eigen values +// and the ratio of max and min but one (min is expected to be zero). +bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) { bool ret_valid = true; - auto eigenvalues = getEigenValues(G); auto t = abs_minmax(eigenvalues); auto absmin = get<0>(t); - auto absmax = get<1>(t); - if (absmax/absmin > max_ratio) { + auto absmin1 = get<1>(t); + auto absmax = get<2>(t); + if (absmax/absmin1 > max_ratio) { #if !NDEBUG - cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl; + cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl; #endif ret_valid = false; } - gsl_vector_free(eigenvalues); return ret_valid; } diff --git a/src/mathfunc.h b/src/mathfunc.h index 8a2ea64..6e20b37 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -23,6 +23,9 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#define CONDITIONED_MAXRATIO 2e+6 // based on http://mathworld.wolfram.com/ConditionNumber.html +#define EIGEN_MINVALUE 1e-10 + using namespace std; using namespace Eigen; @@ -34,10 +37,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w); void CenterMatrix(gsl_matrix *G, const gsl_matrix *W); void StandardizeMatrix(gsl_matrix *G); double ScaleMatrix(gsl_matrix *G); +bool has_negative_values_but_one(const gsl_vector *v); +uint count_small_values(const gsl_vector *v, double min); bool isMatrixPositiveDefinite(const gsl_matrix *G); -bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0); +bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO); bool isMatrixSymmetric(const gsl_matrix *G); -bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5); +gsl_vector *getEigenValues(const gsl_matrix *G); double SumVector(const gsl_vector *v); double CenterVector(gsl_vector *y); void CenterVector(gsl_vector *y, const gsl_matrix *W); diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index 358038f..be9fd78 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -257,14 +257,7 @@ double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l, gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda); // Eigen decomposition of Lambda. - EigenDecomp(Lambda, U_l, D_l, 0); - - for (size_t i = 0; i < d_size; i++) { - d = gsl_vector_get(D_l, i); - if (d < 0) { - gsl_vector_set(D_l, i, 0); - } - } + EigenDecomp_Zeroed(Lambda, U_l, D_l, 0); // Calculate UltVeh and UltVehi. gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh); diff --git a/src/param.h b/src/param.h index 249bf02..08b1e10 100644 --- a/src/param.h +++ b/src/param.h @@ -112,7 +112,8 @@ public: class PARAM { public: // IO-related parameters - bool mode_check = true; + bool mode_check = true; // run data checks (slower) + bool mode_strict = false; // exit on some data checks bool mode_silence; bool mode_debug = false; uint issue; // enable tests for issue on github tracker diff --git a/test/src/unittests-math.cpp b/test/src/unittests-math.cpp index 11aadf6..ac4c180 100644 --- a/test/src/unittests-math.cpp +++ b/test/src/unittests-math.cpp @@ -16,21 +16,21 @@ TEST_CASE( "Math functions", "[math]" ) { copy(data, data+9, m->data); REQUIRE( isMatrixPositiveDefinite(m) ); REQUIRE( isMatrixSymmetric(m) ); - REQUIRE( checkMatrixEigen(m,0.001) ); + // REQUIRE( checkMatrixEigen(m,0.001) ); double data1[] = {1.0,0,0, 0,3.0,0, 0,0,2.0}; copy(data1, data1+9, m->data); REQUIRE( isMatrixPositiveDefinite(m) ); - REQUIRE( checkMatrixEigen(m) ); + // REQUIRE( checkMatrixEigen(m) ); double data2[] = {1,1,1, 1,1,1, 1,1,0.5}; copy(data2, data2+9, m->data); REQUIRE( !isMatrixPositiveDefinite(m)); - REQUIRE( !checkMatrixEigen(m) ); + // REQUIRE( !checkMatrixEigen(m) ); double data3[] = {1.0, 0, 0, 3.0,3.0, 0, @@ -38,7 +38,7 @@ TEST_CASE( "Math functions", "[math]" ) { copy(data3, data3+9, m->data); REQUIRE( !isMatrixPositiveDefinite(m) ); REQUIRE( !isMatrixSymmetric(m) ); - REQUIRE( !checkMatrixEigen(m) ); + // REQUIRE( checkMatrixEigen(m) ); // ---- NaN checks vector<double> v = {1.0, 2.0}; diff --git a/test/test_suite.sh b/test/test_suite.sh index 0991c63..44eb14c 100755 --- a/test/test_suite.sh +++ b/test/test_suite.sh @@ -8,9 +8,10 @@ testCenteredRelatednessMatrixKFullLOCO1() { -p ../example/mouse_hs1940.pheno.txt \ -a ../example/mouse_hs1940.anno.txt \ -loco 1 -gk -debug -o $outn - assertEquals 1 $? - # assertEquals "1940" `wc -l < $outfn` - # assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + outfn=output/$outn.cXX.txt + assertEquals "1940" `wc -l < $outfn` + assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testUnivariateLinearMixedModelFullLOCO1() { @@ -24,26 +25,24 @@ testUnivariateLinearMixedModelFullLOCO1() { -lmm \ -debug \ -o $outn - assertEquals 1 $? - # grep "total computation time" < output/$outn.log.txt - # assertEquals 0 $? - # outfn=output/$outn.assoc.txt - # assertEquals "951" `wc -l < $outfn` - # assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + grep "total computation time" < output/$outn.log.txt + assertEquals 0 $? + outfn=output/$outn.assoc.txt + assertEquals "951" `wc -l < $outfn` + assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testCenteredRelatednessMatrixK() { $gemma -g ../example/mouse_hs1940.geno.txt.gz \ -p ../example/mouse_hs1940.pheno.txt \ -gk -o mouse_hs1940 - assertEquals 1 $? - # grep "total computation time" < output/mouse_hs1940.log.txt - # assertEquals 1 $? - # outfn=output/mouse_hs1940.cXX.txt - # assertEquals "1940" `wc -l < $outfn` - # assertEquals "3763600" `wc -w < $outfn` - # assertEquals "0.335" `head -c 5 $outfn` - # assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + outfn=output/mouse_hs1940.cXX.txt + assertEquals "1940" `wc -l < $outfn` + assertEquals "3763600" `wc -w < $outfn` + assertEquals "0.335" `head -c 5 $outfn` + assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testUnivariateLinearMixedModel() { @@ -54,12 +53,12 @@ testUnivariateLinearMixedModel() { -k ./output/mouse_hs1940.cXX.txt \ -lmm \ -o mouse_hs1940_CD8_lmm - assertEquals 1 $? - # grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt - # assertEquals 0 $? - # outfn=output/mouse_hs1940_CD8_lmm.assoc.txt - # assertEquals "118459" `wc -w < $outfn` - # assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt + assertEquals 0 $? + outfn=output/mouse_hs1940_CD8_lmm.assoc.txt + assertEquals "118459" `wc -w < $outfn` + assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testMultivariateLinearMixedModel() { @@ -69,11 +68,11 @@ testMultivariateLinearMixedModel() { -a ../example/mouse_hs1940.anno.txt \ -k ./output/mouse_hs1940.cXX.txt \ -lmm -o mouse_hs1940_CD8MCH_lmm - assertEquals 1 $? + assertEquals 0 $? - # outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt - # assertEquals "139867" `wc -w < $outfn` - # assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt + assertEquals "139867" `wc -w < $outfn` + assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } testPlinkStandardRelatednessMatrixK() { @@ -83,9 +82,9 @@ testPlinkStandardRelatednessMatrixK() { rm -f $outfn $gemma -bfile $datadir/HLC \ -gk 2 -o $testname - assertEquals 1 $? - # assertEquals "427" `wc -l < $outfn` - # assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + assertEquals "427" `wc -l < $outfn` + assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } # Test for https://github.com/genetics-statistics/GEMMA/issues/58 @@ -99,10 +98,10 @@ testPlinkMultivariateLinearMixedModel() { -maf 0.1 \ -c $datadir/HLC_covariates.txt \ -o $testname - assertEquals 1 $? - # outfn=output/$testname.assoc.txt - # assertEquals "223243" `wc -l < $outfn` - # assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` + assertEquals 0 $? + outfn=output/$testname.assoc.txt + assertEquals "223243" `wc -l < $outfn` + assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn` } shunit2=`which shunit2` |