aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/debug.cpp30
-rw-r--r--src/debug.h34
-rw-r--r--src/gemma.cpp34
-rw-r--r--src/io.cpp3
-rw-r--r--src/main.cpp8
-rw-r--r--src/mathfunc.cpp123
-rw-r--r--src/mathfunc.h6
-rw-r--r--src/mvlmm.cpp6
-rw-r--r--src/param.h4
9 files changed, 227 insertions, 21 deletions
diff --git a/src/debug.cpp b/src/debug.cpp
new file mode 100644
index 0000000..4e37538
--- /dev/null
+++ b/src/debug.cpp
@@ -0,0 +1,30 @@
+
+#include <cmath>
+#include <cstring>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sys/stat.h>
+#include <vector>
+
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+
+#include "debug.h"
+#include "mathfunc.h"
+
+// Helper function called by macro validate_K(K, check)
+void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) {
+ if (do_check) {
+ debug_msg("Validating K");
+ if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!");
+ if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!") if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" );
+ if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!");
+;
+ }
+}
diff --git a/src/debug.h b/src/debug.h
index 3fbe9e0..82dd245 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -4,26 +4,49 @@
#include <assert.h>
#include <iostream>
-// enforce works like assert but also when NDEBUG is set (i.e., it
-// always works). enforce_msg prints message instead of expr
+#include "gsl/gsl_matrix.h"
+
+void gemma_gsl_error_handler (const char * reason,
+ const char * file,
+ int line, int gsl_errno);
+
+
+// Validation routines
+void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line);
#define ROUND(f) round(f * 10000.)/10000
+#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__)
+
+#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl;
+
+inline void fail_at_msg(const char *__file, int __line, const char *msg) {
+ std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl;
+ exit(1);
+}
#if defined NDEBUG
+
+#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl;
#define debug_msg(msg)
#define assert_issue(is_issue, expr)
-#else
-#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl;
+
+#else // DEBUG
+
+#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
+#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
#define assert_issue(is_issue, expr) \
((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0))
#endif
+// enforce works like assert but also when NDEBUG is set (i.e., it
+// always works). enforce_msg prints message instead of expr
+
/* This prints an "Assertion failed" message and aborts. */
inline void __enforce_fail(const char *__assertion, const char *__file,
unsigned int __line,
const char *__function)
{
- std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl;
+ std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __PRETTY_FUNCTION__ << std::endl;
exit(1);
}
@@ -54,5 +77,4 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
: __enforce_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__, \
__LINE__, __ASSERT_FUNCTION))
-
#endif
diff --git a/src/gemma.cpp b/src/gemma.cpp
index c82bf4a..7749bc9 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -44,11 +44,20 @@
#include "prdt.h"
#include "varcov.h"
#include "vc.h"
+#include "debug.h"
using namespace std;
GEMMA::GEMMA(void) : version("0.97.1"), date("08/07/2017"), year("2017") {}
+void gemma_gsl_error_handler (const char * reason,
+ const char * file,
+ int line, int gsl_errno) {
+ cerr << "GSL ERROR: " << reason << " in " << file
+ << " at line " << line << " errno " << gsl_errno <<endl;
+ exit(22);
+}
+
void GEMMA::PrintHeader(void) {
cout << endl;
cout << "*********************************************************" << endl;
@@ -247,10 +256,10 @@ void GEMMA::PrintHelp(size_t option) {
<< endl;
cout << " to fit a multivariate linear mixed model: " << endl;
cout << " ./gemma -bfile [prefix] -k [filename] -lmm [num] -n "
- "[num1] [num2] -o [prefix]"
+ "[pheno cols...] -o [prefix]"
<< endl;
cout << " ./gemma -g [filename] -p [filename] -a [filename] -k "
- "[filename] -lmm [num] -n [num1] [num2] -o [prefix]"
+ "[filename] -lmm [num] -n [pheno cols...] -o [prefix]"
<< endl;
cout << " to fit a Bayesian sparse linear mixed model: " << endl;
cout << " ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
@@ -536,6 +545,7 @@ void GEMMA::PrintHelp(size_t option) {
if (option == 9) {
cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+ cout << " -n [pheno cols...] - range of phenotypes" << endl;
cout << " -pnr "
<< " specify the pvalue threshold to use the Newton-Raphson's method "
"(default 0.001)"
@@ -694,6 +704,7 @@ void GEMMA::PrintHelp(size_t option) {
if (option == 14) {
cout << " DEBUG OPTIONS" << endl;
+ cout << " -no-checks disable checks" << endl;
cout << " -silence silent terminal display" << endl;
cout << " -debug debug output" << endl;
cout << " -nind [num] read up to num individuals" << endl;
@@ -1578,6 +1589,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
cPar.window_ns = atoi(str.c_str());
} else if (strcmp(argv[i], "-debug") == 0) {
cPar.mode_debug = true;
+ } else if (strcmp(argv[i], "-no-checks") == 0) {
+ cPar.mode_check = false;
} else {
cout << "error! unrecognized option: " << argv[i] << endl;
cPar.error = true;
@@ -1713,6 +1726,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
cout << "error! fail to read kinship/relatedness file. " << endl;
return;
}
+ // FIXME: this is strange, why read twice?
ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
cPar.error, G_full);
if (cPar.error == true) {
@@ -1723,6 +1737,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
CenterMatrix(G_full);
+ validate_K(G,cPar.mode_check);
// eigen-decomposition and calculate trace_G
cout << "Start Eigen-Decomposition..." << endl;
@@ -1869,6 +1884,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
return;
}
+ // Now we have the Kinship matrix test it
+ validate_K(G,cPar.mode_check);
+
if (cPar.a_mode == 21) {
cPar.WriteMatrix(G, "cXX");
} else {
@@ -2146,6 +2164,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
cPar.v_enrich, cPar.v_se_enrich);
+ assert(!has_nan(cPar.v_se_pve));
+
// if LDSC weights, then compute the weights and run the above steps again
if (cPar.a_mode == 62) {
// compute the weights and normalize the weights for A
@@ -2175,8 +2195,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
cPar.v_enrich, cPar.v_se_enrich);
+ assert(!has_nan(cPar.v_se_pve));
}
+
gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
cPar.WriteMatrix(S, "S");
@@ -2265,6 +2287,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
cPar.v_se_enrich);
+ assert(!has_nan(cPar.v_se_pve));
gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
gsl_vector_memcpy(&s_sub.vector, s_ref);
@@ -2325,6 +2348,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
+ validate_K(G,cPar.mode_check);
(cPar.v_traceG).clear();
double d = 0;
@@ -2636,6 +2660,7 @@ return;}
CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+ assert(!has_nan(cPar.v_se_pve));
// write files
// cPar.WriteMatrix (XWz, "XWz");
@@ -2692,6 +2717,7 @@ return;}
// center matrix G
CenterMatrix(G);
+ validate_K(G,cPar.mode_check);
// is residual weights are provided, then
if (!cPar.file_weight.empty()) {
@@ -3007,6 +3033,7 @@ return;}
// center matrix G
CenterMatrix(G);
+ validate_K(G,cPar.mode_check);
} else {
cPar.ReadGenotypes(UtX, G, true);
}
@@ -3123,6 +3150,8 @@ return;}
// center matrix G
CenterMatrix(G);
+ validate_K(G,cPar.mode_check);
+
} else {
cPar.ReadGenotypes(UtX, G, true);
}
@@ -3329,6 +3358,7 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
outfile << " " << cPar.v_se_pve[i];
}
outfile << endl;
+ assert(!has_nan(cPar.v_se_pve));
if (cPar.n_vc > 1) {
outfile << "## total pve = " << cPar.pve_total << endl;
diff --git a/src/io.cpp b/src/io.cpp
index 8f3b58f..fedf69a 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -39,6 +39,7 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
+#include "debug.h"
#include "eigenlib.h"
#include "gzstream.h"
#include "io.h"
@@ -1138,7 +1139,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
if (j_total == ni_total) {
cout << "error! number of columns in the "
<< "kinship file is larger than the number"
- << " of phentypes for row = " << i_total << endl;
+ << " of phenotypes for row = " << i_total << endl;
error = true;
}
diff --git a/src/main.cpp b/src/main.cpp
index e37b154..92c4d90 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -25,14 +25,6 @@
using namespace std;
-void gemma_gsl_error_handler (const char * reason,
- const char * file,
- int line, int gsl_errno) {
- cerr << "GSL ERROR: " << reason << " in " << file
- << " at line " << line << " errno " << gsl_errno <<endl;
- exit(22);
-}
-
int main(int argc, char *argv[]) {
GEMMA cGemma;
PARAM cPar;
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 9a4bb8b..3b70102 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -29,15 +29,25 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
+#include <tuple>
#include <vector>
#include "Eigen/Dense"
+
+#include "gsl/gsl_version.h"
+
+#if GSL_MAJOR_VERSION < 2
+#pragma message "GSL version " GSL_VERSION
+#endif
+
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_linalg.h"
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
+#include "gsl/gsl_eigen.h"
+#include "debug.h"
#include "eigenlib.h"
#include "lapack.h"
#include "mathfunc.h"
@@ -45,6 +55,14 @@
using namespace std;
using namespace Eigen;
+bool has_nan(const vector<double> v) {
+ for (const auto& e: v) {
+ if (std::isnan(e))
+ return true;
+ }
+ return false;
+}
+
// calculate variance of a vector
double VectorVar(const gsl_vector *v) {
double d, m = 0.0, m2 = 0.0;
@@ -187,6 +205,111 @@ double ScaleMatrix(gsl_matrix *G) {
return d;
}
+bool isMatrixSymmetric(const gsl_matrix *G) {
+ enforce(G->size1 == G->size2);
+ auto m = G->data;
+ // upper triangle
+ auto size = G->size1;
+ for(auto c = 0; c < size; c++) {
+ for(auto r = 0; r < c; r++) {
+ int x1 = c, y1 = r, x2 = r, y2 = c;
+ auto idx1 = y1*size+x1, idx2 = y2*size+x2;
+ // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
+ if(m[idx1] != m[idx2]) {
+ cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl;
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+bool isMatrixPositiveDefinite(const gsl_matrix *G) {
+ enforce(G->size1 == G->size2);
+ auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+ enforce_gsl(gsl_matrix_memcpy(G2,G));
+ auto handler = gsl_set_error_handler_off();
+#if GSL_MAJOR_VERSION >= 2
+ auto s = gsl_linalg_cholesky_decomp1(G2);
+#else
+ auto s = gsl_linalg_cholesky_decomp(G2);
+#endif
+ gsl_set_error_handler(handler);
+ gsl_matrix_free(G2);
+ return (s == GSL_SUCCESS);
+}
+
+gsl_vector *getEigenValues(const gsl_matrix *G) {
+ enforce(G->size1 == G->size2);
+ auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+ enforce_gsl(gsl_matrix_memcpy(G2,G));
+ auto eworkspace = gsl_eigen_symm_alloc(G->size1);
+ enforce(eworkspace);
+ gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+ enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
+ gsl_eigen_symm_free(eworkspace);
+ gsl_matrix_free(G2);
+ return eigenvalues;
+}
+
+// Check whether eigen values are larger than *min*
+// by default 1E-5.
+// Returns success, eigen min, eigen max
+
+tuple<double, double> abs_minmax(const gsl_vector *v) {
+ auto min = std::abs(v->data[0]);
+ auto max = std::abs(v->data[0]);
+ for (auto i=0; i<v->size; i++) {
+ auto value = std::abs(v->data[i]);
+ if (value < min)
+ min = value;
+ if (value > max)
+ max = value;
+ }
+ return std::make_tuple(min, max);
+}
+
+bool has_negative_values(const gsl_vector *v) {
+ for (auto i=0; i<v->size; i++) {
+ if (v->data[i] < 0.0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+bool checkMatrixEigen(const gsl_matrix *G, double min) {
+ auto eigenvalues = getEigenValues(G);
+ bool ret_valid = true;
+
+ if (has_negative_values(eigenvalues))
+ ret_valid = false;
+
+ auto t = abs_minmax(eigenvalues);
+ auto absmin = get<0>(t);
+ if (absmin < min)
+ ret_valid = false;
+ gsl_vector_free(eigenvalues);
+ return ret_valid;
+}
+
+bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) {
+ bool ret_valid = true;
+ auto eigenvalues = getEigenValues(G);
+
+ auto t = abs_minmax(eigenvalues);
+ auto absmin = get<0>(t);
+ auto absmax = get<1>(t);
+ if (absmax/absmin > max_ratio) {
+ #if !NDEBUG
+ cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl;
+ #endif
+ ret_valid = false;
+ }
+ gsl_vector_free(eigenvalues);
+ return ret_valid;
+}
+
double SumVector(const gsl_vector *v) {
double sum = 0;
for (int i = 0; i < v->size; i++ ) {
diff --git a/src/mathfunc.h b/src/mathfunc.h
index b9f3c73..8a2ea64 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -26,12 +26,18 @@
using namespace std;
using namespace Eigen;
+bool has_nan(const vector<double> v);
+
double VectorVar(const gsl_vector *v);
void CenterMatrix(gsl_matrix *G);
void CenterMatrix(gsl_matrix *G, const gsl_vector *w);
void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
void StandardizeMatrix(gsl_matrix *G);
double ScaleMatrix(gsl_matrix *G);
+bool isMatrixPositiveDefinite(const gsl_matrix *G);
+bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0);
+bool isMatrixSymmetric(const gsl_matrix *G);
+bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5);
double SumVector(const gsl_vector *v);
double CenterVector(gsl_vector *y);
void CenterVector(gsl_vector *y, const gsl_matrix *W);
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index eb591ca..358038f 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -305,7 +305,7 @@ double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
d1 = gsl_matrix_get(X, i, k);
d2 = gsl_matrix_get(X, j, k);
delta = gsl_vector_get(eval, k);
- d += d1 * d2 / (dl * delta + 1.0);
+ d += d1 * d2 / (dl * delta + 1.0); // @@
}
}
@@ -366,7 +366,7 @@ void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
for (size_t i = 0; i < d_size; i++) {
dl = gsl_vector_get(D_l, i);
- d_u = dl / (delta * dl + 1.0);
+ d_u = dl / (delta * dl + 1.0); // @@
d_e = delta * d_u;
gsl_matrix_set(OmegaU, i, k, d_u);
@@ -961,7 +961,7 @@ void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
d = delta * dl + 1.0;
gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
- gsl_vector_scale(&mat_row.vector, 1.0 / d);
+ gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@
logdet_H += log(d);
}
diff --git a/src/param.h b/src/param.h
index f4d649f..249bf02 100644
--- a/src/param.h
+++ b/src/param.h
@@ -111,10 +111,12 @@ public:
class PARAM {
public:
- // IO-related parameters.
+ // IO-related parameters
+ bool mode_check = true;
bool mode_silence;
bool mode_debug = false;
uint issue; // enable tests for issue on github tracker
+
int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
vector<size_t> p_column; // Which phenotype column needs analysis.