diff options
-rw-r--r-- | src/debug.cpp | 30 | ||||
-rw-r--r-- | src/debug.h | 34 | ||||
-rw-r--r-- | src/gemma.cpp | 34 | ||||
-rw-r--r-- | src/io.cpp | 3 | ||||
-rw-r--r-- | src/main.cpp | 8 | ||||
-rw-r--r-- | src/mathfunc.cpp | 123 | ||||
-rw-r--r-- | src/mathfunc.h | 6 | ||||
-rw-r--r-- | src/mvlmm.cpp | 6 | ||||
-rw-r--r-- | src/param.h | 4 |
9 files changed, 227 insertions, 21 deletions
diff --git a/src/debug.cpp b/src/debug.cpp new file mode 100644 index 0000000..4e37538 --- /dev/null +++ b/src/debug.cpp @@ -0,0 +1,30 @@ + +#include <cmath> +#include <cstring> +#include <ctime> +#include <fstream> +#include <iostream> +#include <string> +#include <sys/stat.h> +#include <vector> + +#include "gsl/gsl_blas.h" +#include "gsl/gsl_cdf.h" +#include "gsl/gsl_eigen.h" +#include "gsl/gsl_linalg.h" +#include "gsl/gsl_matrix.h" +#include "gsl/gsl_vector.h" + +#include "debug.h" +#include "mathfunc.h" + +// Helper function called by macro validate_K(K, check) +void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) { + if (do_check) { + debug_msg("Validating K"); + if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!"); + if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!") if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" ); + if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!"); +; + } +} diff --git a/src/debug.h b/src/debug.h index 3fbe9e0..82dd245 100644 --- a/src/debug.h +++ b/src/debug.h @@ -4,26 +4,49 @@ #include <assert.h> #include <iostream> -// enforce works like assert but also when NDEBUG is set (i.e., it -// always works). enforce_msg prints message instead of expr +#include "gsl/gsl_matrix.h" + +void gemma_gsl_error_handler (const char * reason, + const char * file, + int line, int gsl_errno); + + +// Validation routines +void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line); #define ROUND(f) round(f * 10000.)/10000 +#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__) + +#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl; + +inline void fail_at_msg(const char *__file, int __line, const char *msg) { + std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl; + exit(1); +} #if defined NDEBUG + +#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl; #define debug_msg(msg) #define assert_issue(is_issue, expr) -#else -#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl; + +#else // DEBUG + +#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; +#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl; #define assert_issue(is_issue, expr) \ ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0)) #endif +// enforce works like assert but also when NDEBUG is set (i.e., it +// always works). enforce_msg prints message instead of expr + /* This prints an "Assertion failed" message and aborts. */ inline void __enforce_fail(const char *__assertion, const char *__file, unsigned int __line, const char *__function) { - std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl; + std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __PRETTY_FUNCTION__ << std::endl; exit(1); } @@ -54,5 +77,4 @@ inline void __enforce_fail(const char *__assertion, const char *__file, : __enforce_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__, \ __LINE__, __ASSERT_FUNCTION)) - #endif diff --git a/src/gemma.cpp b/src/gemma.cpp index c82bf4a..7749bc9 100644 --- a/src/gemma.cpp +++ b/src/gemma.cpp @@ -44,11 +44,20 @@ #include "prdt.h" #include "varcov.h" #include "vc.h" +#include "debug.h" using namespace std; GEMMA::GEMMA(void) : version("0.97.1"), date("08/07/2017"), year("2017") {} +void gemma_gsl_error_handler (const char * reason, + const char * file, + int line, int gsl_errno) { + cerr << "GSL ERROR: " << reason << " in " << file + << " at line " << line << " errno " << gsl_errno <<endl; + exit(22); +} + void GEMMA::PrintHeader(void) { cout << endl; cout << "*********************************************************" << endl; @@ -247,10 +256,10 @@ void GEMMA::PrintHelp(size_t option) { << endl; cout << " to fit a multivariate linear mixed model: " << endl; cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -n " - "[num1] [num2] -o [prefix]" + "[pheno cols...] -o [prefix]" << endl; cout << " ./gemma -g [filename] -p [filename] -a [filename] -k " - "[filename] -lmm [num] -n [num1] [num2] -o [prefix]" + "[filename] -lmm [num] -n [pheno cols...] -o [prefix]" << endl; cout << " to fit a Bayesian sparse linear mixed model: " << endl; cout << " ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl; @@ -536,6 +545,7 @@ void GEMMA::PrintHelp(size_t option) { if (option == 9) { cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl; + cout << " -n [pheno cols...] - range of phenotypes" << endl; cout << " -pnr " << " specify the pvalue threshold to use the Newton-Raphson's method " "(default 0.001)" @@ -694,6 +704,7 @@ void GEMMA::PrintHelp(size_t option) { if (option == 14) { cout << " DEBUG OPTIONS" << endl; + cout << " -no-checks disable checks" << endl; cout << " -silence silent terminal display" << endl; cout << " -debug debug output" << endl; cout << " -nind [num] read up to num individuals" << endl; @@ -1578,6 +1589,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) { cPar.window_ns = atoi(str.c_str()); } else if (strcmp(argv[i], "-debug") == 0) { cPar.mode_debug = true; + } else if (strcmp(argv[i], "-no-checks") == 0) { + cPar.mode_check = false; } else { cout << "error! unrecognized option: " << argv[i] << endl; cPar.error = true; @@ -1713,6 +1726,7 @@ void GEMMA::BatchRun(PARAM &cPar) { cout << "error! fail to read kinship/relatedness file. " << endl; return; } + // FIXME: this is strange, why read twice? ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode, cPar.error, G_full); if (cPar.error == true) { @@ -1723,6 +1737,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // center matrix G CenterMatrix(G); CenterMatrix(G_full); + validate_K(G,cPar.mode_check); // eigen-decomposition and calculate trace_G cout << "Start Eigen-Decomposition..." << endl; @@ -1869,6 +1884,9 @@ void GEMMA::BatchRun(PARAM &cPar) { return; } + // Now we have the Kinship matrix test it + validate_K(G,cPar.mode_check); + if (cPar.a_mode == 21) { cPar.WriteMatrix(G, "cXX"); } else { @@ -2146,6 +2164,8 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); + assert(!has_nan(cPar.v_se_pve)); + // if LDSC weights, then compute the weights and run the above steps again if (cPar.a_mode == 62) { // compute the weights and normalize the weights for A @@ -2175,8 +2195,10 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); + assert(!has_nan(cPar.v_se_pve)); } + gsl_vector_set(s, cPar.n_vc, cPar.ni_test); cPar.WriteMatrix(S, "S"); @@ -2265,6 +2287,7 @@ void GEMMA::BatchRun(PARAM &cPar) { cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); + assert(!has_nan(cPar.v_se_pve)); gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc); gsl_vector_memcpy(&s_sub.vector, s_ref); @@ -2325,6 +2348,7 @@ void GEMMA::BatchRun(PARAM &cPar) { // center matrix G CenterMatrix(G); + validate_K(G,cPar.mode_check); (cPar.v_traceG).clear(); double d = 0; @@ -2636,6 +2660,7 @@ return;} CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich); + assert(!has_nan(cPar.v_se_pve)); // write files // cPar.WriteMatrix (XWz, "XWz"); @@ -2692,6 +2717,7 @@ return;} // center matrix G CenterMatrix(G); + validate_K(G,cPar.mode_check); // is residual weights are provided, then if (!cPar.file_weight.empty()) { @@ -3007,6 +3033,7 @@ return;} // center matrix G CenterMatrix(G); + validate_K(G,cPar.mode_check); } else { cPar.ReadGenotypes(UtX, G, true); } @@ -3123,6 +3150,8 @@ return;} // center matrix G CenterMatrix(G); + validate_K(G,cPar.mode_check); + } else { cPar.ReadGenotypes(UtX, G, true); } @@ -3329,6 +3358,7 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) { outfile << " " << cPar.v_se_pve[i]; } outfile << endl; + assert(!has_nan(cPar.v_se_pve)); if (cPar.n_vc > 1) { outfile << "## total pve = " << cPar.pve_total << endl; @@ -39,6 +39,7 @@ #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include "debug.h" #include "eigenlib.h" #include "gzstream.h" #include "io.h" @@ -1138,7 +1139,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv, if (j_total == ni_total) { cout << "error! number of columns in the " << "kinship file is larger than the number" - << " of phentypes for row = " << i_total << endl; + << " of phenotypes for row = " << i_total << endl; error = true; } diff --git a/src/main.cpp b/src/main.cpp index e37b154..92c4d90 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -25,14 +25,6 @@ using namespace std; -void gemma_gsl_error_handler (const char * reason, - const char * file, - int line, int gsl_errno) { - cerr << "GSL ERROR: " << reason << " in " << file - << " at line " << line << " errno " << gsl_errno <<endl; - exit(22); -} - int main(int argc, char *argv[]) { GEMMA cGemma; PARAM cPar; diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 9a4bb8b..3b70102 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -29,15 +29,25 @@ #include <stdio.h> #include <stdlib.h> #include <string> +#include <tuple> #include <vector> #include "Eigen/Dense" + +#include "gsl/gsl_version.h" + +#if GSL_MAJOR_VERSION < 2 +#pragma message "GSL version " GSL_VERSION +#endif + #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_linalg.h" #include "gsl/gsl_matrix.h" #include "gsl/gsl_vector.h" +#include "gsl/gsl_eigen.h" +#include "debug.h" #include "eigenlib.h" #include "lapack.h" #include "mathfunc.h" @@ -45,6 +55,14 @@ using namespace std; using namespace Eigen; +bool has_nan(const vector<double> v) { + for (const auto& e: v) { + if (std::isnan(e)) + return true; + } + return false; +} + // calculate variance of a vector double VectorVar(const gsl_vector *v) { double d, m = 0.0, m2 = 0.0; @@ -187,6 +205,111 @@ double ScaleMatrix(gsl_matrix *G) { return d; } +bool isMatrixSymmetric(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto m = G->data; + // upper triangle + auto size = G->size1; + for(auto c = 0; c < size; c++) { + for(auto r = 0; r < c; r++) { + int x1 = c, y1 = r, x2 = r, y2 = c; + auto idx1 = y1*size+x1, idx2 = y2*size+x2; + // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]); + if(m[idx1] != m[idx2]) { + cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl; + return false; + } + } + } + return true; +} + +bool isMatrixPositiveDefinite(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto G2 = gsl_matrix_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto handler = gsl_set_error_handler_off(); +#if GSL_MAJOR_VERSION >= 2 + auto s = gsl_linalg_cholesky_decomp1(G2); +#else + auto s = gsl_linalg_cholesky_decomp(G2); +#endif + gsl_set_error_handler(handler); + gsl_matrix_free(G2); + return (s == GSL_SUCCESS); +} + +gsl_vector *getEigenValues(const gsl_matrix *G) { + enforce(G->size1 == G->size2); + auto G2 = gsl_matrix_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto eworkspace = gsl_eigen_symm_alloc(G->size1); + enforce(eworkspace); + gsl_vector *eigenvalues = gsl_vector_alloc(G->size1); + enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace)); + gsl_eigen_symm_free(eworkspace); + gsl_matrix_free(G2); + return eigenvalues; +} + +// Check whether eigen values are larger than *min* +// by default 1E-5. +// Returns success, eigen min, eigen max + +tuple<double, double> abs_minmax(const gsl_vector *v) { + auto min = std::abs(v->data[0]); + auto max = std::abs(v->data[0]); + for (auto i=0; i<v->size; i++) { + auto value = std::abs(v->data[i]); + if (value < min) + min = value; + if (value > max) + max = value; + } + return std::make_tuple(min, max); +} + +bool has_negative_values(const gsl_vector *v) { + for (auto i=0; i<v->size; i++) { + if (v->data[i] < 0.0) { + return true; + } + } + return false; +} + +bool checkMatrixEigen(const gsl_matrix *G, double min) { + auto eigenvalues = getEigenValues(G); + bool ret_valid = true; + + if (has_negative_values(eigenvalues)) + ret_valid = false; + + auto t = abs_minmax(eigenvalues); + auto absmin = get<0>(t); + if (absmin < min) + ret_valid = false; + gsl_vector_free(eigenvalues); + return ret_valid; +} + +bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) { + bool ret_valid = true; + auto eigenvalues = getEigenValues(G); + + auto t = abs_minmax(eigenvalues); + auto absmin = get<0>(t); + auto absmax = get<1>(t); + if (absmax/absmin > max_ratio) { + #if !NDEBUG + cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl; + #endif + ret_valid = false; + } + gsl_vector_free(eigenvalues); + return ret_valid; +} + double SumVector(const gsl_vector *v) { double sum = 0; for (int i = 0; i < v->size; i++ ) { diff --git a/src/mathfunc.h b/src/mathfunc.h index b9f3c73..8a2ea64 100644 --- a/src/mathfunc.h +++ b/src/mathfunc.h @@ -26,12 +26,18 @@ using namespace std; using namespace Eigen; +bool has_nan(const vector<double> v); + double VectorVar(const gsl_vector *v); void CenterMatrix(gsl_matrix *G); void CenterMatrix(gsl_matrix *G, const gsl_vector *w); void CenterMatrix(gsl_matrix *G, const gsl_matrix *W); void StandardizeMatrix(gsl_matrix *G); double ScaleMatrix(gsl_matrix *G); +bool isMatrixPositiveDefinite(const gsl_matrix *G); +bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0); +bool isMatrixSymmetric(const gsl_matrix *G); +bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5); double SumVector(const gsl_vector *v); double CenterVector(gsl_vector *y); void CenterVector(gsl_vector *y, const gsl_matrix *W); diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp index eb591ca..358038f 100644 --- a/src/mvlmm.cpp +++ b/src/mvlmm.cpp @@ -305,7 +305,7 @@ double CalcQi(const gsl_vector *eval, const gsl_vector *D_l, d1 = gsl_matrix_get(X, i, k); d2 = gsl_matrix_get(X, j, k); delta = gsl_vector_get(eval, k); - d += d1 * d2 / (dl * delta + 1.0); + d += d1 * d2 / (dl * delta + 1.0); // @@ } } @@ -366,7 +366,7 @@ void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l, for (size_t i = 0; i < d_size; i++) { dl = gsl_vector_get(D_l, i); - d_u = dl / (delta * dl + 1.0); + d_u = dl / (delta * dl + 1.0); // @@ d_e = delta * d_u; gsl_matrix_set(OmegaU, i, k, d_u); @@ -961,7 +961,7 @@ void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X, d = delta * dl + 1.0; gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i); - gsl_vector_scale(&mat_row.vector, 1.0 / d); + gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@ logdet_H += log(d); } diff --git a/src/param.h b/src/param.h index f4d649f..249bf02 100644 --- a/src/param.h +++ b/src/param.h @@ -111,10 +111,12 @@ public: class PARAM { public: - // IO-related parameters. + // IO-related parameters + bool mode_check = true; bool mode_silence; bool mode_debug = false; uint issue; // enable tests for issue on github tracker + int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value; vector<size_t> p_column; // Which phenotype column needs analysis. |