From d564a6f16613985340040cc7ab0ffc371cbce3d1 Mon Sep 17 00:00:00 2001 From: Pjotr Prins Date: Sun, 20 Aug 2017 09:20:06 +0000 Subject: Added checks for K --- src/debug.cpp | 30 ++++++++++++++ src/debug.h | 34 ++++++++++++--- src/gemma.cpp | 34 ++++++++++++++- src/io.cpp | 3 +- src/main.cpp | 8 ---- src/mathfunc.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/mathfunc.h | 6 +++ src/mvlmm.cpp | 6 +-- src/param.h | 4 +- 9 files changed, 227 insertions(+), 21 deletions(-) create mode 100644 src/debug.cpp diff --git a/src/debug.cpp b/src/debug.cpp new file mode 100644 index 0000000..4e37538 --- /dev/null +++ b/src/debug.cpp @@ -0,0 +1,30 @@ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" +#include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" + +#include "debug.h" +#include "mathfunc.h" + +// Helper function called by macro validate_K(K, check) +void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) { + if (do_check) { + debug_msg("Validating K"); + if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!"); + if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!") if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" ); + if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!"); +; + } +} diff --git a/src/debug.h b/src/debug.h index 3fbe9e0..82dd245 100644 --- a/src/debug.h +++ b/src/debug.h @@ -4,26 +4,49 @@ #include #include -// enforce works like assert but also when NDEBUG is set (i.e., it -// always works). enforce_msg prints message instead of expr +#include "gsl/gsl_matrix.h" + +void gemma_gsl_error_handler (const char * reason, + const char * file, + int line, int gsl_errno); + + +// Validation routines +void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line); #define ROUND(f) round(f * 10000.)/10000 +#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__) + +#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl; + +inline void fail_at_msg(const char *__file, int __line, const char *msg) { + std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl; + exit(1); +} #if defined NDEBUG + +#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl; #define debug_msg(msg) #define assert_issue(is_issue, expr) -#else -#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl; + +#else // DEBUG + +#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; +#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; #define assert_issue(is_issue, expr) \ ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0)) #endif +// enforce works like assert but also when NDEBUG is set (i.e., it +// always works). enforce_msg prints message instead of expr + /* This prints an "Assertion failed" message and aborts. */ inline void __enforce_fail(const char *__assertion, const char *__file, unsigned int __line, const char *__function) { - std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl; + std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __PRETTY_FUNCTION__ << std::endl; exit(1); } @@ -54,5 +77,4 @@ inline void __enforce_fail(const char *__assertion, const char *__file, : __enforce_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__, \ __LINE__, __ASSERT_FUNCTION)) - #endif diff --git a/src/gemma.cpp b/src/gemma.cpp index c82bf4a..7749bc9 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -44,11 +44,20 @@ #include "prdt.h" #include "varcov.h" #include "vc.h" +#include "debug.h" using namespace std; GEMMA::GEMMA(void) : version("0.97.1"), date("08/07/2017"), year("2017") {} +void gemma_gsl_error_handler (const char * reason, + const char * file, + int line, int gsl_errno) { + cerr << "GSL ERROR: " << reason << " in " << file + << " at line " << line << " errno " << gsl_errno < 1) { outfile << "## total pve = " << cPar.pve_total << endl; diff --git a/src/io.cpp b/src/io.cpp index 8f3b58f..fedf69a 100644 --- a/src/io.cpp +++ b/src/io.cpp @@ -39,6 +39,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include "debug.h" #include "eigenlib.h" #include "gzstream.h" #include "io.h" @@ -1138,7 +1139,7 @@ void ReadFile_kin(const string &file_kin, vector &indicator_idv, if (j_total == ni_total) { cout << "error! number of columns in the " << "kinship file is larger than the number" - << " of phentypes for row = " << i_total << endl; + << " of phenotypes for row = " << i_total << endl; error = true; } diff --git a/src/main.cpp b/src/main.cpp index e37b154..92c4d90 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,14 +25,6 @@ using namespace std; -void gemma_gsl_error_handler (const char * reason, - const char * file, - int line, int gsl_errno) { - cerr << "GSL ERROR: " << reason << " in " << file - << " at line " << line << " errno " << gsl_errno < #include #include +#include #include #include "Eigen/Dense" + +#include "gsl/gsl_version.h" + +#if GSL_MAJOR_VERSION < 2 +#pragma message "GSL version " GSL_VERSION +#endif + #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include "gsl/gsl_eigen.h" +#include "debug.h" #include "eigenlib.h" #include "lapack.h" #include "mathfunc.h" @@ -45,6 +55,14 @@ using namespace std; using namespace Eigen; +bool has_nan(const vector v) { + for (const auto& e: v) { + if (std::isnan(e)) + return true; + } + return false; +} + // calculate variance of a vector double VectorVar(const gsl_vector *v) { double d, m = 0.0, m2 = 0.0; @@ -187,6 +205,111 @@ double ScaleMatrix(gsl_matrix *G) { return d; } +bool isMatrixSymmetric(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto m = G->data; + // upper triangle + auto size = G->size1; + for(auto c = 0; c < size; c++) { + for(auto r = 0; r < c; r++) { + int x1 = c, y1 = r, x2 = r, y2 = c; + auto idx1 = y1*size+x1, idx2 = y2*size+x2; + // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]); + if(m[idx1] != m[idx2]) { + cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl; + return false; + } + } + } + return true; +} + +bool isMatrixPositiveDefinite(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto G2 = gsl_matrix_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto handler = gsl_set_error_handler_off(); +#if GSL_MAJOR_VERSION >= 2 + auto s = gsl_linalg_cholesky_decomp1(G2); +#else + auto s = gsl_linalg_cholesky_decomp(G2); +#endif + gsl_set_error_handler(handler); + gsl_matrix_free(G2); + return (s == GSL_SUCCESS); +} + +gsl_vector *getEigenValues(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto G2 = gsl_matrix_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto eworkspace = gsl_eigen_symm_alloc(G->size1); + enforce(eworkspace); + gsl_vector *eigenvalues = gsl_vector_alloc(G->size1); + enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace)); + gsl_eigen_symm_free(eworkspace); + gsl_matrix_free(G2); + return eigenvalues; +} + +// Check whether eigen values are larger than *min* +// by default 1E-5. +// Returns success, eigen min, eigen max + +tuple abs_minmax(const gsl_vector *v) { + auto min = std::abs(v->data[0]); + auto max = std::abs(v->data[0]); + for (auto i=0; isize; i++) { + auto value = std::abs(v->data[i]); + if (value < min) + min = value; + if (value > max) + max = value; + } + return std::make_tuple(min, max); +} + +bool has_negative_values(const gsl_vector *v) { + for (auto i=0; isize; i++) { + if (v->data[i] < 0.0) { + return true; + } + } + return false; +} + +bool checkMatrixEigen(const gsl_matrix *G, double min) { + auto eigenvalues = getEigenValues(G); + bool ret_valid = true; + + if (has_negative_values(eigenvalues)) + ret_valid = false; + + auto t = abs_minmax(eigenvalues); + auto absmin = get<0>(t); + if (absmin < min) + ret_valid = false; + gsl_vector_free(eigenvalues); + return ret_valid; +} + +bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) { + bool ret_valid = true; + auto eigenvalues = getEigenValues(G); + + auto t = abs_minmax(eigenvalues); + auto absmin = get<0>(t); + auto absmax = get<1>(t); + if (absmax/absmin > max_ratio) { + #if !NDEBUG + cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl; + #endif + ret_valid = false; + } + gsl_vector_free(eigenvalues); + return ret_valid; +} + double SumVector(const gsl_vector *v) { double sum = 0; for (int i = 0; i < v->size; i++ ) { diff --git a/src/mathfunc.h b/src/mathfunc.h index b9f3c73..8a2ea64 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -26,12 +26,18 @@ using namespace std; using namespace Eigen; +bool has_nan(const vector v); + double VectorVar(const gsl_vector *v); void CenterMatrix(gsl_matrix *G); void CenterMatrix(gsl_matrix *G, const gsl_vector *w); void CenterMatrix(gsl_matrix *G, const gsl_matrix *W); void StandardizeMatrix(gsl_matrix *G); double ScaleMatrix(gsl_matrix *G); +bool isMatrixPositiveDefinite(const gsl_matrix *G); +bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0); +bool isMatrixSymmetric(const gsl_matrix *G); +bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5); double SumVector(const gsl_vector *v); double CenterVector(gsl_vector *y); void CenterVector(gsl_vector *y, const gsl_matrix *W); diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index eb591ca..358038f 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -305,7 +305,7 @@ double CalcQi(const gsl_vector *eval, const gsl_vector *D_l, d1 = gsl_matrix_get(X, i, k); d2 = gsl_matrix_get(X, j, k); delta = gsl_vector_get(eval, k); - d += d1 * d2 / (dl * delta + 1.0); + d += d1 * d2 / (dl * delta + 1.0); // @@ } } @@ -366,7 +366,7 @@ void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l, for (size_t i = 0; i < d_size; i++) { dl = gsl_vector_get(D_l, i); - d_u = dl / (delta * dl + 1.0); + d_u = dl / (delta * dl + 1.0); // @@ d_e = delta * d_u; gsl_matrix_set(OmegaU, i, k, d_u); @@ -961,7 +961,7 @@ void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X, d = delta * dl + 1.0; gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i); - gsl_vector_scale(&mat_row.vector, 1.0 / d); + gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@ logdet_H += log(d); } diff --git a/src/param.h b/src/param.h index f4d649f..249bf02 100644 --- a/src/param.h +++ b/src/param.h @@ -111,10 +111,12 @@ public: class PARAM { public: - // IO-related parameters. + // IO-related parameters + bool mode_check = true; bool mode_silence; bool mode_debug = false; uint issue; // enable tests for issue on github tracker + int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector p_column; // Which phenotype column needs analysis. -- cgit v1.2.3