diff options
author | Pjotr Prins | 2018-09-06 10:25:22 +0000 |
---|---|---|
committer | Pjotr Prins | 2018-09-06 10:25:22 +0000 |
commit | 6dd15bfabc5c655d18ea19c0d69b76ecc34630e2 (patch) | |
tree | 336c37cac9b105bec550405a11853277631c2810 | |
parent | 8010061e8af476d66a0ca6fb6d509b36acdb9b9a (diff) | |
download | pangemma-6dd15bfabc5c655d18ea19c0d69b76ecc34630e2.tar.gz |
More debugging info and raise SIGINT instead of exit
-rw-r--r-- | src/debug.cpp | 15 | ||||
-rw-r--r-- | src/debug.h | 19 | ||||
-rw-r--r-- | src/gemma.cpp | 29 | ||||
-rw-r--r-- | src/gemma.h | 19 | ||||
-rw-r--r-- | src/lapack.cpp | 25 | ||||
-rw-r--r-- | src/lmm.cpp | 122 | ||||
-rw-r--r-- | src/mathfunc.cpp | 14 | ||||
-rw-r--r-- | src/mathfunc.h | 5 | ||||
-rw-r--r-- | src/mvlmm.cpp | 15 | ||||
-rw-r--r-- | src/param.cpp | 25 | ||||
-rwxr-xr-x | test/dev_test_suite.sh | 2 |
11 files changed, 178 insertions, 112 deletions
diff --git a/src/debug.cpp b/src/debug.cpp index bc40b44..2f1f861 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -39,6 +39,7 @@ #include "mathfunc.h" static bool debug_mode = false; +static bool debug_data_mode = false; static bool debug_check = true; // check data/algorithms static bool debug_fpe_check = true; // check floating point errors (intel hardware) static bool debug_strict = false; // fail on error, more rigorous checks @@ -47,6 +48,7 @@ static uint debug_issue = 0; // track github issues static bool debug_legacy = false; // legacy mode void debug_set_debug_mode(bool setting) { debug_mode = setting; } +void debug_set_debug_data_mode(bool setting) { debug_data_mode = setting; } void debug_set_no_check_mode(bool setting) {debug_check = !setting; } void debug_set_no_fpe_check_mode(bool setting) {debug_fpe_check = !setting; } void debug_set_strict_mode(bool setting) { debug_strict = setting; } @@ -55,6 +57,7 @@ void debug_set_issue(uint issue) { debug_issue = issue; } void debug_set_legacy_mode(bool setting) { debug_legacy = setting; } bool is_debug_mode() { return debug_mode; }; +bool is_debug_data_mode() { return debug_data_mode; }; bool is_no_check_mode() { return !debug_check; }; bool is_check_mode() { return debug_check; }; bool is_fpe_check_mode() { return debug_fpe_check; }; @@ -151,7 +154,13 @@ void disable_segfpe() { #endif } +void write(const char *s, const char *msg) { + if (!is_debug_data_mode()) return; + cout << s << ": " << msg << endl; +} + void write(const gsl_vector *v, const char *msg) { + if (!is_debug_data_mode()) return; if (msg) cout << "// " << msg << endl; cout << "// vector size: " << v->size << endl; cout << "double " << msg << "[] = {"; @@ -162,6 +171,7 @@ void write(const gsl_vector *v, const char *msg) { } void write(const gsl_matrix *m, const char *msg) { + if (!is_debug_data_mode()) return; if (msg) cout << "// " << msg << endl; // Matrices are stored in row-major order, meaning that each row of // elements forms a contiguous block in memory. This is the standard @@ -169,8 +179,9 @@ void write(const gsl_matrix *m, const char *msg) { // rows is size1. auto rows = m->size1; // see https://www.gnu.org/software/gsl/manual/html_node/Matrices.html#Matrices auto cols = m->size2; + auto tda = m->tda; - cout << "// matrix size: " << cols << " cols, " << rows << " rows" << endl; + cout << "// matrix size: " << cols << " cols, " << rows << " rows," << tda << " tda" << endl; cout << "double " << msg << "[] = {"; for (size_t row=0; row < rows; row++) { for (size_t col=0; col < cols; col++) { @@ -211,6 +222,7 @@ void do_gsl_matrix_safe_free (gsl_matrix *m, const char *__pretty_function, cons bool has_NaN = has_nan(m); bool has_Inf = has_inf(m); if (has_NaN || has_Inf) { + write(m); std::string msg = "Matrix (size "; msg += std::to_string(m->size1); msg += "x"; @@ -236,6 +248,7 @@ void do_gsl_vector_safe_free (gsl_vector *v, const char *__pretty_function, cons bool has_NaN = has_nan(v); bool has_Inf = has_inf(v); if (has_NaN || has_Inf) { + write(v); std::string msg = "Vector (size "; msg += std::to_string(v->size); msg += ")"; diff --git a/src/debug.h b/src/debug.h index 3133963..e83c813 100644 --- a/src/debug.h +++ b/src/debug.h @@ -23,6 +23,7 @@ #include <assert.h> #include <iostream> +#include <csignal> #include "gsl/gsl_matrix.h" @@ -31,6 +32,7 @@ void gemma_gsl_error_handler (const char * reason, int line, int gsl_errno); void debug_set_debug_mode(bool setting); +void debug_set_debug_data_mode(bool setting); void debug_set_no_check_mode(bool setting); void debug_set_no_fpe_check_mode(bool setting); void debug_set_strict_mode(bool setting); @@ -39,6 +41,7 @@ void debug_set_issue(uint issue); void debug_set_legacy_mode(bool setting); bool is_debug_mode(); +bool is_debug_data_mode(); bool is_no_check_mode(); bool is_check_mode(); bool is_fpe_check_mode(); @@ -54,8 +57,9 @@ void disable_segfpe(); { auto x = m * n; \ enforce_msg(x / m == n, "multiply integer overflow"); } -void write(const gsl_vector *v, const char *msg); -void write(const gsl_matrix *m, const char *msg); +void write(const char *s, const char *msg = ""); +void write(const gsl_vector *v, const char *msg = ""); +void write(const gsl_matrix *m, const char *msg = ""); gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols); int gsl_matrix_safe_memcpy (gsl_matrix *dest, const gsl_matrix *src); @@ -89,12 +93,12 @@ inline void warnfail_at_msg(bool strict, const char *__function, const char *__f std::cerr << "**** WARNING: "; std::cerr << msg << " in " << __file << " at line " << __line << " in " << __function << std::endl; if (strict) - exit(1); + std::raise(SIGINT); // keep stack trace for gdb } inline void fail_at_msg(const char *__file, int __line, std::string msg) { std::cerr << "**** FAILED: " << msg << " in " << __file << " at line " << __line << std::endl; - exit(1); + std::raise(SIGINT); // keep stack trace for gdb } # ifndef __ASSERT_VOID_CAST @@ -103,12 +107,12 @@ inline void fail_at_msg(const char *__file, int __line, std::string msg) { inline void fail_msg(const char *msg) { std::cerr << "**** FAILED: " << msg << std::endl; - exit(5); + std::raise(SIGINT); // keep stack trace for gdb } inline void fail_msg(std::string msg) { std::cerr << "**** FAILED: " << msg << std::endl; - exit(5); + std::raise(SIGINT); // keep stack trace for gdb } #if defined NDEBUG @@ -138,7 +142,8 @@ inline void __enforce_fail(const char *__assertion, const char *__file, const char *__function) { std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl; - exit(1); + std::raise(SIGINT); // keep stack trace for gdb + // exit(1); } #define enforce(expr) \ diff --git a/src/gemma.cpp b/src/gemma.cpp index 7d12055..cb01ee0 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -48,6 +48,7 @@ extern "C" { #include "bslmm.h" #include "bslmmdap.h" +#include <csignal> // for gsl_error_handler #include "gemma.h" #include "gemma_io.h" #include "lapack.h" @@ -64,23 +65,6 @@ extern "C" { using namespace std; -// OPTIONS -// ------- -// gk: 21-22 -// gs: 25-26 -// gq: 27-28 -// eigen: 31-32 -// lmm: 1-5 -// bslmm: 11-15 -// predict: 41-43 -// lm: 51 -// vc: 61 -// ci: 66-67 -// calccor: 71 -// gw: 72 - -enum M_MODE { M_LMM1=1, M_LMM2=2, M_LMM3=3, M_LMM4=4, M_LMM5=5, M_KIN=21, M_KIN2=22, M_EIGEN=31 }; - GEMMA::GEMMA(void) : version(GEMMA_VERSION), date(GEMMA_DATE), year(GEMMA_YEAR) {} void gemma_gsl_error_handler (const char * reason, @@ -88,7 +72,7 @@ void gemma_gsl_error_handler (const char * reason, int line, int gsl_errno) { cerr << "GSL ERROR: " << reason << " in " << file << " at line " << line << " errno " << gsl_errno <<endl; - exit(22); + std::raise(SIGINT); // keep the stack trace for gdb } #if defined(OPENBLAS) && !defined(OPENBLAS_LEGACY) @@ -737,6 +721,7 @@ void GEMMA::PrintHelp(size_t option) { cout << " -strict strict mode will stop when there is a problem" << endl; cout << " -silence silent terminal display" << endl; cout << " -debug debug output" << endl; + cout << " -debug-data debug data output" << endl; cout << " -nind [num] read up to num individuals" << endl; cout << " -issue [num] enable tests relevant to issue tracker" << endl; cout << " -legacy run gemma in legacy mode" << endl; @@ -1611,6 +1596,10 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { str.clear(); str.assign(argv[i]); cPar.window_ns = atoi(str.c_str()); + } else if (strcmp(argv[i], "-debug-data") == 0) { + // cPar.mode_debug = true; + debug_set_debug_data_mode(true); + debug_set_debug_mode(true); } else if (strcmp(argv[i], "-debug") == 0) { // cPar.mode_debug = true; debug_set_debug_mode(true); @@ -1780,7 +1769,7 @@ void GEMMA::BatchRun(PARAM &cPar) { cout << "Start Eigen-Decomposition..." << endl; time_start = clock(); cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); - write(eval,"eval zeroed"); + // write(eval,"eval zeroed"); cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0); // calculate UtW and Uty @@ -2625,7 +2614,7 @@ void GEMMA::BatchRun(PARAM &cPar) { } else { cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0); } - write(eval,"eval"); + // write(eval,"eval"); if (!cPar.file_weight.empty()) { double wi; diff --git a/src/gemma.h b/src/gemma.h index 4deab51..67a7f58 100644 --- a/src/gemma.h +++ b/src/gemma.h @@ -25,6 +25,25 @@ using namespace std; +// OPTIONS +// ------- +// gk: 21-22 +// gs: 25-26 +// gq: 27-28 +// eigen: 31-32 +// lmm: 1-5 +// bslmm: 11-15 +// predict: 41-43 +// lm: 51 +// vc: 61 +// ci: 66-67 +// calccor: 71 +// gw: 72 + +enum M_MODE { M_LMM1=1, M_LMM2=2, M_LMM3=3, M_LMM4=4, M_LMM5=5, + M_BSLMM5=15, + M_KIN=21, M_KIN2=22, M_EIGEN=31 }; + class GEMMA { public: diff --git a/src/lapack.cpp b/src/lapack.cpp index 165a82d..125e5a0 100644 --- a/src/lapack.cpp +++ b/src/lapack.cpp @@ -272,12 +272,14 @@ double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval, } d /= (double)eval->size; if (count_zero_eigenvalues > 1) { + write(eval,"eigenvalues"); std::string msg = "Matrix G has "; msg += std::to_string(count_zero_eigenvalues); msg += " eigenvalues close to zero"; warning_msg(msg); } if (count_negative_eigenvalues > 0) { + write(eval,"eigenvalues"); warning_msg("Matrix G has more than one negative eigenvalues!"); } @@ -299,13 +301,23 @@ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) { // LU decomposition. void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) { + // debug_msg("entering"); enforce_gsl(gsl_linalg_LU_decomp(LU, p, signum)); return; } // LU invert. Returns inverse. Note that GSL does not recommend using // this function + +// These functions compute the inverse of a matrix A from its LU +// decomposition (LU,p), storing the result in the matrix inverse. The +// inverse is computed by solving the system A x = b for each column +// of the identity matrix. It is preferable to avoid direct use of the +// inverse whenever possible, as the linear solver functions can +// obtain the same result more efficiently and reliably (consult any +// introductory textbook on numerical linear algebra for details). void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse) { + // debug_msg("entering"); auto det = LULndet(LU); assert(det != 1.0); @@ -313,13 +325,24 @@ void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_in } // LU lndet. + +// These functions compute the logarithm of the absolute value of the +// determinant of a matrix A, \ln|\det(A)|, from its LU decomposition, +// LU. This function may be useful if the direct computation of the +// determinant would overflow or underflow. + double LULndet(const gsl_matrix *LU) { - return gsl_linalg_LU_lndet((gsl_matrix *)LU); + // debug_msg("entering"); + + double res = gsl_linalg_LU_lndet((gsl_matrix *)LU); + enforce_msg(!is_inf(res), "LU determinant is zero -> LU is not invertable"); + return res; } // LU solve. void LUSolve(const gsl_matrix *LU, const gsl_permutation *p, const gsl_vector *b, gsl_vector *x) { + // debug_msg("entering"); enforce_gsl(gsl_linalg_LU_solve(LU, p, b, x)); return; } diff --git a/src/lmm.cpp b/src/lmm.cpp index 0fa8ea5..d08c90e 100644 --- a/src/lmm.cpp +++ b/src/lmm.cpp @@ -292,7 +292,7 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, double p_ab = 0.0; - debug_msg("CalcPab"); + write("CalcPab"); write(Hi_eval,"Hi_eval"); write(Uab,"Uab"); // write(ab,"ab"); @@ -305,19 +305,19 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, for (size_t b = a; b <= n_cvt + 2; ++b) { // b in a..rest size_t index_ab = GetabIndex(a, b, n_cvt); // index in top half matrix, see above if (p == 0) { // fills row 0 for each a,b using dot product of Hi_eval . Uab(a) - cout << "p is 0 " << index_ab; // walk row 0 + // cout << "p is 0 " << index_ab; // walk row 0 gsl_vector_const_view Uab_col = gsl_matrix_const_column(Uab, index_ab); // get the column gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab); // dot product with H_eval if (e_mode != 0) { // if not null model (defunct right now) if (! is_legacy_mode()) assert(false); // disabling to see when it is used; allow with legacy mode p_ab = gsl_vector_get(unused, index_ab) - p_ab; // was ab } - cout << p << "r," << index_ab << ":" << p_ab << endl; + // cout << p << "r, index_ab " << index_ab << ":" << p_ab << endl; gsl_matrix_set(Pab, 0, index_ab, p_ab); write(Pab,"Pab int"); } else { // walk the rest of the upper triangle of the matrix (row 1..n). Cols jump with 2 at a time - cout << "a" << a << "b" << b << "p" << p << "n_cvt" << n_cvt << endl; + // cout << "a" << a << "b" << b << "p" << p << "n_cvt" << n_cvt << endl; write(Pab,"Pab int"); size_t index_aw = GetabIndex(a, p, n_cvt); size_t index_bw = GetabIndex(b, p, n_cvt); @@ -329,20 +329,22 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval, double ps_bw = gsl_matrix_safe_get(Pab, p - 1, index_bw); double ps_ww = gsl_matrix_safe_get(Pab, p - 1, index_ww); - cout << "unsafe " << p-1 << "," << index_ww << ":" << gsl_matrix_get(Pab,p-1,index_ww) << endl; + // cout << "unsafe " << p-1 << "," << index_ww << ":" << gsl_matrix_get(Pab,p-1,index_ww) << endl; // if (is_check_mode() || is_debug_mode()) assert(ps_ww != 0.0); if (ps_ww != 0) p_ab = ps_ab - ps_aw * ps_bw / ps_ww; + else + p_ab = ps_ab; - cout << "set " << p << "r," << index_ab << "c: " << p_ab << endl; + // cout << "set " << p << "r, index_ab " << index_ab << "c: " << p_ab << endl; gsl_matrix_set(Pab, p, index_ab, p_ab); } } } } write(Pab,"Pab"); - if (is_strict_mode() && (has_nan(Uab) || has_nan(Pab) || has_nan(Hi_eval))) - exit(2); + // if (is_strict_mode() && (has_nan(Uab) || has_nan(Pab) || has_nan(Hi_eval))) + // exit(2); return; } @@ -353,7 +355,7 @@ void CalcPPab(const size_t n_cvt, const size_t e_mode, double p2_ab; double ps2_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww; - debug_msg("CalcPPab"); + write("CalcPPab"); write(HiHi_eval,"Hi_eval"); write(Uab,"Uab"); // write(ab,"ab"); @@ -390,6 +392,9 @@ void CalcPPab(const size_t n_cvt, const size_t e_mode, p2_ab = ps2_ab + ps_aw * ps_bw * ps2_ww / (ps_ww * ps_ww); p2_ab -= (ps_aw * ps2_bw + ps_bw * ps2_aw) / ps_ww; } + else { + p2_ab = ps2_ab; + } gsl_matrix_set(PPab, p, index_ab, p2_ab); } @@ -397,8 +402,8 @@ void CalcPPab(const size_t n_cvt, const size_t e_mode, } } write(PPab,"PPab"); - if (is_strict_mode() && (has_nan(Uab) || has_nan(PPab) || has_nan(HiHi_eval))) - exit(2); + // if (is_strict_mode() && (has_nan(Uab) || has_nan(PPab) || has_nan(HiHi_eval))) + // exit(2); return; } @@ -410,7 +415,7 @@ void CalcPPPab(const size_t n_cvt, const size_t e_mode, double p3_ab; double ps3_ab, ps_aw, ps_bw, ps_ww, ps2_aw, ps2_bw, ps2_ww, ps3_aw, ps3_bw, ps3_ww; - debug_msg("CalcPPPab"); + write("CalcPPPab"); write(HiHiHi_eval,"HiHiHi_eval"); write(Uab,"Uab"); @@ -454,14 +459,17 @@ void CalcPPPab(const size_t n_cvt, const size_t e_mode, ps_aw * ps_bw * ps3_ww) / (ps_ww * ps_ww); } + else { + p3_ab = ps3_ab; + } gsl_matrix_set(PPPab, p, index_ab, p3_ab); } } } } write(PPPab,"PPPab"); - if (is_strict_mode() && (has_nan(Uab) || has_nan(PPPab) || has_nan(HiHiHi_eval))) - exit(2); + // if (is_strict_mode() && (has_nan(Uab) || has_nan(PPPab) || has_nan(HiHiHi_eval))) + // exit(2); return; } @@ -497,27 +505,27 @@ double LogL_f(double l, void *params) { for (size_t i = 0; i < (p->eval)->size; ++i) { d = gsl_vector_get(v_temp, i); - logdet_h += log(fabs(d)); + logdet_h += safe_log(fabs(d)); } CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); double c = - 0.5 * (double)ni_test * (log((double)ni_test) - log(2 * M_PI) - 1.0); + 0.5 * (double)ni_test * (safe_log((double)ni_test) - safe_log(2 * M_PI) - 1.0); index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_yy); if (is_check_mode() || is_debug_mode()) { - cerr << "P_yy is" << P_yy << endl; + // cerr << "P_yy is" << P_yy << endl; assert(!is_nan(P_yy)); assert(P_yy > 0); } - f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy); + f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * safe_log(P_yy); if (is_check_mode() || is_debug_mode()) { assert(!is_nan(f)); } - gsl_matrix_safe_free(Pab); // FIXME + gsl_matrix_free(Pab); // FIXME gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(v_temp); return f; @@ -610,8 +618,8 @@ $8 = 6 double yPKPy = (P_yy - PP_yy) / l; dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy; - gsl_matrix_safe_free(Pab); // FIXME: may contain NaN - gsl_matrix_safe_free(PPab); // FIXME: may contain NaN + gsl_matrix_free(Pab); // FIXME: may contain NaN + gsl_matrix_free(PPab); // FIXME: may contain NaN gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(v_temp); @@ -685,9 +693,9 @@ double LogL_dev2(double l, void *params) { 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); - gsl_matrix_safe_free(Pab); // FIXME - gsl_matrix_safe_free(PPab); - gsl_matrix_safe_free(PPPab); + gsl_matrix_free(Pab); // FIXME + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(HiHiHi_eval); @@ -765,9 +773,9 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) { 0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); - gsl_matrix_safe_free(Pab); // FIXME: may contain NaN - gsl_matrix_safe_free(PPab); // FIXME: may contain NaN - gsl_matrix_safe_free(PPPab); // FIXME: may contain NaN + gsl_matrix_free(Pab); // FIXME: may contain NaN + gsl_matrix_free(PPab); // FIXME: may contain NaN + gsl_matrix_free(PPPab); // FIXME: may contain NaN gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(HiHiHi_eval); @@ -812,7 +820,7 @@ double LogRL_f(double l, void *params) { for (size_t i = 0; i < (p->eval)->size; ++i) { d = gsl_vector_get(v_temp, i); - logdet_h += log(fabs(d)); + logdet_h += safe_log(fabs(d)); } CalcPab(n_cvt, p->e_mode, Hi_eval, p->Uab, p->ab, Pab); @@ -824,18 +832,18 @@ double LogRL_f(double l, void *params) { for (size_t i = 0; i < nc_total; ++i) { index_ww = GetabIndex(i + 1, i + 1, n_cvt); d = gsl_matrix_safe_get(Pab, i, index_ww); - logdet_hiw += log(d); + logdet_hiw += safe_log(d); d = gsl_matrix_safe_get(Iab, i, index_ww); - logdet_hiw -= log(d); + logdet_hiw -= safe_log(d); } index_ww = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt); double P_yy = gsl_matrix_safe_get(Pab, nc_total, index_ww); - double c = 0.5 * df * (log(df) - log(2 * M_PI) - 1.0); - f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * log(P_yy); + double c = 0.5 * df * (safe_log(df) - safe_log(2 * M_PI) - 1.0); + f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * safe_log(P_yy); - gsl_matrix_safe_free(Pab); - gsl_matrix_safe_free(Iab); + gsl_matrix_free(Pab); + gsl_matrix_free(Iab); // contains NaN gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(v_temp); return f; @@ -911,8 +919,8 @@ double LogRL_dev1(double l, void *params) { dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy; - gsl_matrix_safe_free(Pab); // FIXME: may contain NaN - gsl_matrix_safe_free(PPab); // FIXME: may contain NaN + gsl_matrix_free(Pab); // FIXME: may contain NaN + gsl_matrix_free(PPab); // FIXME: may contain NaN gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(v_temp); @@ -999,9 +1007,9 @@ double LogRL_dev2(double l, void *params) { dev2 = 0.5 * trace_PKPK - 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); - gsl_matrix_safe_free(Pab); // FIXME - gsl_matrix_safe_free(PPab); - gsl_matrix_safe_free(PPPab); + gsl_matrix_free(Pab); // FIXME + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(HiHiHi_eval); @@ -1091,9 +1099,9 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) { *dev2 = 0.5 * trace_PKPK - 0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy); - gsl_matrix_safe_free(Pab); // FIXME - gsl_matrix_safe_free(PPab); - gsl_matrix_safe_free(PPPab); + gsl_matrix_free(Pab); // FIXME + gsl_matrix_free(PPab); + gsl_matrix_free(PPPab); gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(HiHi_eval); gsl_vector_safe_free(HiHiHi_eval); @@ -1138,7 +1146,7 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM ¶ms, double &beta, se = sqrt(1.0 / (tau * P_xx)); p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df); - gsl_matrix_safe_free(Pab); + gsl_matrix_free(Pab); gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(v_temp); return; @@ -1182,7 +1190,7 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM ¶ms, double &beta, p_score = gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df); - gsl_matrix_safe_free(Pab); + gsl_matrix_free(Pab); gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(v_temp); return; @@ -1225,7 +1233,7 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) { gsl_vector_mul(&Uab_col.vector, u_a); } - cout << "a" << a << endl; + // cout << "a" << a << endl; write(Uab,"Uab iteration"); } @@ -1440,7 +1448,7 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval, gsl_vector_safe_free(y); gsl_vector_safe_free(Uty); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused infile.close(); infile.clear(); @@ -1626,7 +1634,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp, gsl_vector_safe_free(x_miss); gsl_vector_safe_free(Utx); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused gsl_matrix_safe_free(Xlarge); gsl_matrix_safe_free(UtXlarge); @@ -1669,7 +1677,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval, if (strcmp(ch_ptr, "NA") != 0) { gs[i] = atof(ch_ptr); if (is_strict_mode() && gs[i] == 0.0) - enforce_is_float(std::string(__STRING(ch_ptr))); // only allow for NA and valid numbers + enforce_is_float(std::string(ch_ptr)); // only allow for NA and valid numbers } } return std::make_tuple(snp,gs); @@ -1790,7 +1798,7 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX, gsl_vector_safe_free(w); gsl_matrix_safe_free(Utw); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused return; } @@ -1809,7 +1817,7 @@ void CalcLambda(const char func_name, FUNC_PARAM ¶ms, const double l_min, // Evaluate first-order derivates in different intervals. double lambda_l, lambda_h, - lambda_interval = log(l_max / l_min) / (double)n_region; + lambda_interval = safe_log(l_max / l_min) / (double)n_region; double dev1_l, dev1_h, logf_l, logf_h; for (size_t i = 0; i < n_region; ++i) { @@ -1972,7 +1980,7 @@ void CalcLambda(const char func_name, const gsl_vector *eval, size_t n_cvt = UtW->size2, ni_test = UtW->size1; size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2; - cout << "n_cvt " << n_cvt << ", ni_test " << ni_test << ", n_index " << n_index << endl; + // cout << "n_cvt " << n_cvt << ", ni_test " << ni_test << ", n_index " << n_index << endl; gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index); gsl_vector *ab = gsl_vector_safe_alloc(n_index); @@ -1989,7 +1997,7 @@ void CalcLambda(const char func_name, const gsl_vector *eval, CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused return; } @@ -2015,7 +2023,7 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW, pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se; gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused return; } @@ -2080,8 +2088,8 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW, } gsl_matrix_safe_free(Uab); - gsl_matrix_safe_free(Pab); - gsl_vector_safe_free(ab); + gsl_matrix_free(Pab); + gsl_vector_free(ab); // ab is unused gsl_vector_safe_free(Hi_eval); gsl_vector_safe_free(v_temp); gsl_matrix_safe_free(HiW); @@ -2233,7 +2241,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval, gsl_vector_safe_free(x_miss); gsl_vector_safe_free(Utx); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused gsl_matrix_safe_free(UtW_expand); @@ -2410,7 +2418,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval, gsl_vector_safe_free(x); gsl_vector_safe_free(Utx); gsl_matrix_safe_free(Uab); - gsl_vector_safe_free(ab); + gsl_vector_free(ab); // unused gsl_matrix_safe_free(UtW_expand); diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 4708ffa..fdd6a58 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -70,27 +70,27 @@ bool has_nan(const vector<double> v) { bool has_nan(const gsl_vector *v) { for (size_t i = 0; i < v->size; ++i) - if (gsl_isnan(gsl_vector_get(v,i))) return true; + if (is_nan(gsl_vector_get(v,i))) return true; return false; } bool has_inf(const gsl_vector *v) { for (size_t i = 0; i < v->size; ++i) { auto value = gsl_vector_get(v,i); - if (gsl_isinf(value) != 0) return true; + if (is_inf(value) != 0) return true; } return false; } bool has_nan(const gsl_matrix *m) { for (size_t i = 0; i < m->size1; ++i) for (size_t j = 0; j < m->size2; ++j) - if (gsl_isnan(gsl_matrix_get(m,i,j))) return true; + if (is_nan(gsl_matrix_get(m,i,j))) return true; return false; } bool has_inf(const gsl_matrix *m) { for (size_t i = 0; i < m->size1; ++i) for (size_t j = 0; j < m->size2; ++j) { auto value = gsl_matrix_get(m,i,j); - if (gsl_isinf(value) != 0) return true; + if (is_inf(value) != 0) return true; } return false; } @@ -103,6 +103,12 @@ bool is_float(const std::string & s){ return std::regex_match(s, std::regex("^[+-]?([0-9]*[.])?[0-9]+$")); } +double safe_log(const double d) { + if (!is_legacy_mode()) + enforce_msg(d > 0.0, (std::string("Trying to take the log of ") + std::to_string(d)).c_str()); + return log(d); +} + // calculate variance of a vector double VectorVar(const gsl_vector *v) { double d, m = 0.0, m2 = 0.0; diff --git a/src/mathfunc.h b/src/mathfunc.h index 89a34e6..5b1e2ec 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -34,6 +34,9 @@ inline bool is_nan(double f) { return (std::isnan(f)); } +#define is_inf(d) gsl_isinf(d) +#define is_nan(d) gsl_isnan(d) + bool has_nan(const vector<double> v); bool has_nan(const gsl_vector *v); bool has_inf(const gsl_vector *v); @@ -42,6 +45,8 @@ bool has_inf(const gsl_matrix *m); bool is_integer(const std::string & s); +double safe_log(const double d); + double VectorVar(const gsl_vector *v); void CenterMatrix(gsl_matrix *G); void CenterMatrix(gsl_matrix *G, const gsl_vector *w); diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index 13325ab..d877302 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -41,6 +41,7 @@ #include "gzstream.h" #include "gemma_io.h" #include "lapack.h" +#include "mathfunc.h" #include "lmm.h" #include "mvlmm.h" @@ -233,7 +234,7 @@ double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l, if (d <= 0) { continue; } - logdet_Ve += log(d); + logdet_Ve += safe_log(d); gsl_vector_view U_col = gsl_matrix_column(U_l, i); d = sqrt(d); @@ -563,7 +564,7 @@ double MphCalcLogL(const gsl_vector *eval, const gsl_vector *xHiy, dl = gsl_vector_get(D_l, i); d = delta * dl + 1.0; - logl += y * y / d + log(d); + logl += y * y / d + safe_log(d); } } @@ -629,10 +630,10 @@ double MphEM(const char func_name, const size_t max_iter, const double max_prec, // Calculate the constant for logl. if (func_name == 'R' || func_name == 'r') { logl_const = - -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) + + -0.5 * (double)(n_size - c_size) * (double)d_size * safe_log(2.0 * M_PI) + 0.5 * (double)d_size * LULndet(XXt); } else { - logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI); + logl_const = -0.5 * (double)n_size * (double)d_size * safe_log(2.0 * M_PI); } // Start EM. @@ -958,7 +959,7 @@ void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X, gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i); gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@ - logdet_H += log(d); + logdet_H += safe_log(d); } gsl_matrix_view Hi_k = @@ -2639,10 +2640,10 @@ double MphNR(const char func_name, const size_t max_iter, const double max_prec, // Calculate the constant for logl. if (func_name == 'R' || func_name == 'r') { logl_const = - -0.5 * (double)(n_size - c_size) * (double)d_size * log(2.0 * M_PI) + + -0.5 * (double)(n_size - c_size) * (double)d_size * safe_log(2.0 * M_PI) + 0.5 * (double)d_size * LULndet(XXt); } else { - logl_const = -0.5 * (double)n_size * (double)d_size * log(2.0 * M_PI); + logl_const = -0.5 * (double)n_size * (double)d_size * safe_log(2.0 * M_PI); } // Optimization iterations. diff --git a/src/param.cpp b/src/param.cpp index 12f4299..6537379 100644 --- a/src/param.cpp +++ b/src/param.cpp @@ -35,6 +35,7 @@ #include "gsl/gsl_vector.h" #include "eigenlib.h" +#include "gemma.h" #include "gemma_io.h" #include "mathfunc.h" #include "param.h" @@ -311,6 +312,7 @@ void PARAM::ReadFiles(void) { maf_level, miss_level, hwe_level, r2_level, mapRS2chr, mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) { error = true; + return; } gsl_matrix_free(W2); @@ -320,12 +322,7 @@ void PARAM::ReadFiles(void) { // Read genotype file for multiple PLINK files. if (!file_mbfile.empty()) { igzstream infile(file_mbfile.c_str(), igzstream::in); - if (!infile) { - cout << "error! fail to open mbfile file: " << file_mbfile << endl; - error = true; - return; - } - + enforce_msg(infile,"fail to open mbfile file"); string file_name; size_t t = 0, ns_test_tmp = 0; gsl_matrix *W3 = NULL; @@ -477,11 +474,7 @@ void PARAM::ReadFiles(void) { ni_test += indicator_idv[i]; } - if (ni_test == 0) { - cout << "error! number of analyzed individuals equals 0. " << endl; - error = true; - return; - } + enforce_msg(ni_test,"number of analyzed individuals equals 0."); } // For ridge prediction, read phenotype only. @@ -1056,6 +1049,8 @@ void PARAM::CheckData(void) { } } + enforce_msg(ni_test,"number of analyzed individuals equals 0."); + if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() && file_study.empty() && file_beta.empty() && file_bf.empty()) { error = true; @@ -1383,7 +1378,8 @@ size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt) { } size_t index = (2 * cols - a1 + 2) * (a1 - 1) / 2 + b1 - a1; - cout << "* GetabIndx " << a1 << "," << b1 << "," << cols << ":" << index << endl; + // cout << "* GetabIndx " << a1 << "," << b1 << "," << cols << ":" << index << endl; + // FIXME: should add a contract for index range return index; // return ( b < a ? ((2 * cols - b + 2) * (b - 1) / 2 + a - b ): ((2 * cols - a + 2) * (a - 1) / 2 + b - a) ); @@ -2062,10 +2058,11 @@ void PARAM::ProcessCvtPhen() { } // Check ni_test. - if (ni_test == 0 && a_mode != 15) { + if (a_mode != M_BSLMM5) + enforce_msg(ni_test,"number of analyzed individuals equals 0."); + if (ni_test == 0 && a_mode != M_BSLMM5) { error = true; cout << "error! number of analyzed individuals equals 0. " << endl; - return; } // Check covariates to see if they are correlated with each diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh index 2fcd26d..381d20a 100755 --- a/test/dev_test_suite.sh +++ b/test/dev_test_suite.sh @@ -31,7 +31,7 @@ testBXDStandardRelatednessMatrixKSingularError() { -gk \ -no-check \ -o $outn - assertEquals 22 $? # should show singular error + assertEquals 130 $? # should show singular error } testBXDStandardRelatednessMatrixK() { |