about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/debug.cpp30
-rw-r--r--src/debug.h34
-rw-r--r--src/gemma.cpp34
-rw-r--r--src/io.cpp3
-rw-r--r--src/main.cpp8
-rw-r--r--src/mathfunc.cpp123
-rw-r--r--src/mathfunc.h6
-rw-r--r--src/mvlmm.cpp6
-rw-r--r--src/param.h4
9 files changed, 227 insertions, 21 deletions
diff --git a/src/debug.cpp b/src/debug.cpp
new file mode 100644
index 0000000..4e37538
--- /dev/null
+++ b/src/debug.cpp
@@ -0,0 +1,30 @@
+
+#include <cmath>
+#include <cstring>
+#include <ctime>
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sys/stat.h>
+#include <vector>
+
+#include "gsl/gsl_blas.h"
+#include "gsl/gsl_cdf.h"
+#include "gsl/gsl_eigen.h"
+#include "gsl/gsl_linalg.h"
+#include "gsl/gsl_matrix.h"
+#include "gsl/gsl_vector.h"
+
+#include "debug.h"
+#include "mathfunc.h"
+
+// Helper function called by macro validate_K(K, check)
+void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) {
+  if (do_check) {
+    debug_msg("Validating K");
+    if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!");
+    if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!")    if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" );
+    if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!");
+;
+  }
+}
diff --git a/src/debug.h b/src/debug.h
index 3fbe9e0..82dd245 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -4,26 +4,49 @@
 #include <assert.h>
 #include <iostream>
 
-// enforce works like assert but also when NDEBUG is set (i.e., it
-// always works). enforce_msg prints message instead of expr
+#include "gsl/gsl_matrix.h"
+
+void gemma_gsl_error_handler (const char * reason,
+                              const char * file,
+                              int line, int gsl_errno);
+
+
+// Validation routines
+void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line);
 
 #define ROUND(f) round(f * 10000.)/10000
+#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__)
+
+#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl;
+
+inline void fail_at_msg(const char *__file, int __line, const char *msg) {
+  std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl;
+  exit(1);
+}
 #if defined NDEBUG
+
+#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl;
 #define debug_msg(msg)
 #define assert_issue(is_issue, expr)
-#else
-#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl;
+
+#else // DEBUG
+
+#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
+#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __PRETTY_FUNCTION__ << endl;
 #define assert_issue(is_issue, expr) \
   ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0))
 
 #endif
 
+// enforce works like assert but also when NDEBUG is set (i.e., it
+// always works). enforce_msg prints message instead of expr
+
 /* This prints an "Assertion failed" message and aborts.  */
 inline void __enforce_fail(const char *__assertion, const char *__file,
                     unsigned int __line,
                     const char *__function)
 {
-  std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl;
+  std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __PRETTY_FUNCTION__ << std::endl;
   exit(1);
 }
 
@@ -54,5 +77,4 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
        : __enforce_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__,         \
                         __LINE__, __ASSERT_FUNCTION))
 
-
 #endif
diff --git a/src/gemma.cpp b/src/gemma.cpp
index c82bf4a..7749bc9 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -44,11 +44,20 @@
 #include "prdt.h"
 #include "varcov.h"
 #include "vc.h"
+#include "debug.h"
 
 using namespace std;
 
 GEMMA::GEMMA(void) : version("0.97.1"), date("08/07/2017"), year("2017") {}
 
+void gemma_gsl_error_handler (const char * reason,
+                              const char * file,
+                              int line, int gsl_errno) {
+  cerr << "GSL ERROR: " << reason << " in " << file
+       << " at line " << line << " errno " << gsl_errno <<endl;
+  exit(22);
+}
+
 void GEMMA::PrintHeader(void) {
   cout << endl;
   cout << "*********************************************************" << endl;
@@ -247,10 +256,10 @@ void GEMMA::PrintHelp(size_t option) {
          << endl;
     cout << " to fit a multivariate linear mixed model: " << endl;
     cout << "         ./gemma -bfile [prefix] -k [filename] -lmm [num] -n "
-            "[num1] [num2] -o [prefix]"
+            "[pheno cols...] -o [prefix]"
          << endl;
     cout << "         ./gemma -g [filename] -p [filename] -a [filename] -k "
-            "[filename] -lmm [num] -n [num1] [num2] -o [prefix]"
+            "[filename] -lmm [num] -n [pheno cols...] -o [prefix]"
          << endl;
     cout << " to fit a Bayesian sparse linear mixed model: " << endl;
     cout << "         ./gemma -bfile [prefix] -bslmm [num] -o [prefix]" << endl;
@@ -536,6 +545,7 @@ void GEMMA::PrintHelp(size_t option) {
 
   if (option == 9) {
     cout << " MULTIVARIATE LINEAR MIXED MODEL OPTIONS" << endl;
+    cout << " -n [pheno cols...] - range of phenotypes" << endl;
     cout << " -pnr				     "
          << " specify the pvalue threshold to use the Newton-Raphson's method "
             "(default 0.001)"
@@ -694,6 +704,7 @@ void GEMMA::PrintHelp(size_t option) {
 
   if (option == 14) {
     cout << " DEBUG OPTIONS" << endl;
+    cout << " -no-checks               disable checks" << endl;
     cout << " -silence                 silent terminal display" << endl;
     cout << " -debug                   debug output" << endl;
     cout << " -nind       [num]        read up to num individuals" << endl;
@@ -1578,6 +1589,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       cPar.window_ns = atoi(str.c_str());
     } else if (strcmp(argv[i], "-debug") == 0) {
       cPar.mode_debug = true;
+    } else if (strcmp(argv[i], "-no-checks") == 0) {
+      cPar.mode_check = false;
     } else {
       cout << "error! unrecognized option: " << argv[i] << endl;
       cPar.error = true;
@@ -1713,6 +1726,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cout << "error! fail to read kinship/relatedness file. " << endl;
       return;
     }
+    // FIXME: this is strange, why read twice?
     ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
                  cPar.error, G_full);
     if (cPar.error == true) {
@@ -1723,6 +1737,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     // center matrix G
     CenterMatrix(G);
     CenterMatrix(G_full);
+    validate_K(G,cPar.mode_check);
 
     // eigen-decomposition and calculate trace_G
     cout << "Start Eigen-Decomposition..." << endl;
@@ -1869,6 +1884,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
       return;
     }
 
+    // Now we have the Kinship matrix test it
+    validate_K(G,cPar.mode_check);
+
     if (cPar.a_mode == 21) {
       cPar.WriteMatrix(G, "cXX");
     } else {
@@ -2146,6 +2164,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
                cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
                cPar.v_enrich, cPar.v_se_enrich);
 
+      assert(!has_nan(cPar.v_se_pve));
+
       // if LDSC weights, then compute the weights and run the above steps again
       if (cPar.a_mode == 62) {
         // compute the weights and normalize the weights for A
@@ -2175,8 +2195,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
                  cPar.ni_study, cPar.v_pve, cPar.v_se_pve, cPar.pve_total,
                  cPar.se_pve_total, cPar.v_sigma2, cPar.v_se_sigma2,
                  cPar.v_enrich, cPar.v_se_enrich);
+        assert(!has_nan(cPar.v_se_pve));
       }
 
+
       gsl_vector_set(s, cPar.n_vc, cPar.ni_test);
 
       cPar.WriteMatrix(S, "S");
@@ -2265,6 +2287,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
                cPar.v_pve, cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total,
                cPar.v_sigma2, cPar.v_se_sigma2, cPar.v_enrich,
                cPar.v_se_enrich);
+      assert(!has_nan(cPar.v_se_pve));
 
       gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
       gsl_vector_memcpy(&s_sub.vector, s_ref);
@@ -2325,6 +2348,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // center matrix G
         CenterMatrix(G);
+        validate_K(G,cPar.mode_check);
 
         (cPar.v_traceG).clear();
         double d = 0;
@@ -2636,6 +2660,7 @@ return;}
     CalcCIss(Xz, XWz, XtXWz, S, Svar, w, z, s_vec, vec_cat, cPar.v_pve,
              cPar.v_se_pve, cPar.pve_total, cPar.se_pve_total, cPar.v_sigma2,
              cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
+      assert(!has_nan(cPar.v_se_pve));
 
     // write files
     // cPar.WriteMatrix (XWz, "XWz");
@@ -2692,6 +2717,7 @@ return;}
 
       // center matrix G
       CenterMatrix(G);
+      validate_K(G,cPar.mode_check);
 
       // is residual weights are provided, then
       if (!cPar.file_weight.empty()) {
@@ -3007,6 +3033,7 @@ return;}
 
         // center matrix G
         CenterMatrix(G);
+        validate_K(G,cPar.mode_check);
       } else {
         cPar.ReadGenotypes(UtX, G, true);
       }
@@ -3123,6 +3150,8 @@ return;}
 
           // center matrix G
           CenterMatrix(G);
+          validate_K(G,cPar.mode_check);
+
         } else {
           cPar.ReadGenotypes(UtX, G, true);
         }
@@ -3329,6 +3358,7 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
       outfile << "  " << cPar.v_se_pve[i];
     }
     outfile << endl;
+    assert(!has_nan(cPar.v_se_pve));
 
     if (cPar.n_vc > 1) {
       outfile << "## total pve = " << cPar.pve_total << endl;
diff --git a/src/io.cpp b/src/io.cpp
index 8f3b58f..fedf69a 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -39,6 +39,7 @@
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
 
+#include "debug.h"
 #include "eigenlib.h"
 #include "gzstream.h"
 #include "io.h"
@@ -1138,7 +1139,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
         if (j_total == ni_total) {
           cout << "error! number of columns in the "
                << "kinship file is larger than the number"
-               << " of phentypes for row = " << i_total << endl;
+               << " of phenotypes for row = " << i_total << endl;
           error = true;
         }
 
diff --git a/src/main.cpp b/src/main.cpp
index e37b154..92c4d90 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -25,14 +25,6 @@
 
 using namespace std;
 
-void gemma_gsl_error_handler (const char * reason,
-                              const char * file,
-                              int line, int gsl_errno) {
-  cerr << "GSL ERROR: " << reason << " in " << file
-       << " at line " << line << " errno " << gsl_errno <<endl;
-  exit(22);
-}
-
 int main(int argc, char *argv[]) {
   GEMMA cGemma;
   PARAM cPar;
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 9a4bb8b..3b70102 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -29,15 +29,25 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string>
+#include <tuple>
 #include <vector>
 
 #include "Eigen/Dense"
+
+#include "gsl/gsl_version.h"
+
+#if GSL_MAJOR_VERSION < 2
+#pragma message "GSL version " GSL_VERSION
+#endif
+
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
 #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
+#include "gsl/gsl_eigen.h"
 
+#include "debug.h"
 #include "eigenlib.h"
 #include "lapack.h"
 #include "mathfunc.h"
@@ -45,6 +55,14 @@
 using namespace std;
 using namespace Eigen;
 
+bool has_nan(const vector<double> v) {
+  for (const auto& e: v) {
+    if (std::isnan(e))
+      return true;
+  }
+  return false;
+}
+
 // calculate variance of a vector
 double VectorVar(const gsl_vector *v) {
   double d, m = 0.0, m2 = 0.0;
@@ -187,6 +205,111 @@ double ScaleMatrix(gsl_matrix *G) {
   return d;
 }
 
+bool isMatrixSymmetric(const gsl_matrix *G) {
+  enforce(G->size1 == G->size2);
+  auto m = G->data;
+  // upper triangle
+  auto size = G->size1;
+  for(auto c = 0; c < size; c++) {
+    for(auto r = 0; r < c; r++) {
+      int x1 = c, y1 = r, x2 = r, y2 = c;
+      auto idx1 = y1*size+x1, idx2 = y2*size+x2;
+      // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
+      if(m[idx1] != m[idx2]) {
+        cout << "Mismatch coordinates (" << c << "," << r << ")" << m[idx1] << ":" << m[idx2] << "!" << endl;
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool isMatrixPositiveDefinite(const gsl_matrix *G) {
+  enforce(G->size1 == G->size2);
+  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto handler = gsl_set_error_handler_off();
+#if GSL_MAJOR_VERSION >= 2
+  auto s = gsl_linalg_cholesky_decomp1(G2);
+#else
+  auto s = gsl_linalg_cholesky_decomp(G2);
+#endif
+  gsl_set_error_handler(handler);
+  gsl_matrix_free(G2);
+  return (s == GSL_SUCCESS);
+}
+
+gsl_vector *getEigenValues(const gsl_matrix *G) {
+  enforce(G->size1 == G->size2);
+  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto eworkspace = gsl_eigen_symm_alloc(G->size1);
+  enforce(eworkspace);
+  gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+  enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
+  gsl_eigen_symm_free(eworkspace);
+  gsl_matrix_free(G2);
+  return eigenvalues;
+}
+
+// Check whether eigen values are larger than *min*
+// by default 1E-5.
+// Returns success, eigen min, eigen max
+
+tuple<double, double> abs_minmax(const gsl_vector *v) {
+  auto min = std::abs(v->data[0]);
+  auto max = std::abs(v->data[0]);
+  for (auto i=0; i<v->size; i++) {
+    auto value = std::abs(v->data[i]);
+    if (value < min)
+      min = value;
+    if (value > max)
+      max = value;
+  }
+  return std::make_tuple(min, max);
+}
+
+bool has_negative_values(const gsl_vector *v) {
+  for (auto i=0; i<v->size; i++) {
+    if (v->data[i] < 0.0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool checkMatrixEigen(const gsl_matrix *G, double min) {
+  auto eigenvalues = getEigenValues(G);
+  bool ret_valid = true;
+
+  if (has_negative_values(eigenvalues))
+    ret_valid = false;
+
+  auto t = abs_minmax(eigenvalues);
+  auto absmin = get<0>(t);
+  if (absmin < min)
+    ret_valid = false;
+  gsl_vector_free(eigenvalues);
+  return ret_valid;
+}
+
+bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) {
+  bool ret_valid = true;
+  auto eigenvalues = getEigenValues(G);
+
+  auto t = abs_minmax(eigenvalues);
+  auto absmin = get<0>(t);
+  auto absmax = get<1>(t);
+  if (absmax/absmin > max_ratio) {
+    #if !NDEBUG
+    cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl;
+    #endif
+    ret_valid = false;
+  }
+  gsl_vector_free(eigenvalues);
+  return ret_valid;
+}
+
 double SumVector(const gsl_vector *v) {
   double sum = 0;
   for (int i = 0; i < v->size; i++ ) {
diff --git a/src/mathfunc.h b/src/mathfunc.h
index b9f3c73..8a2ea64 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -26,12 +26,18 @@
 using namespace std;
 using namespace Eigen;
 
+bool has_nan(const vector<double> v);
+
 double VectorVar(const gsl_vector *v);
 void CenterMatrix(gsl_matrix *G);
 void CenterMatrix(gsl_matrix *G, const gsl_vector *w);
 void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
 void StandardizeMatrix(gsl_matrix *G);
 double ScaleMatrix(gsl_matrix *G);
+bool isMatrixPositiveDefinite(const gsl_matrix *G);
+bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0);
+bool isMatrixSymmetric(const gsl_matrix *G);
+bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5);
 double SumVector(const gsl_vector *v);
 double CenterVector(gsl_vector *y);
 void CenterVector(gsl_vector *y, const gsl_matrix *W);
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index eb591ca..358038f 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -305,7 +305,7 @@ double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
             d1 = gsl_matrix_get(X, i, k);
             d2 = gsl_matrix_get(X, j, k);
             delta = gsl_vector_get(eval, k);
-            d += d1 * d2 / (dl * delta + 1.0);
+            d += d1 * d2 / (dl * delta + 1.0); // @@
           }
         }
 
@@ -366,7 +366,7 @@ void CalcOmega(const gsl_vector *eval, const gsl_vector *D_l,
     for (size_t i = 0; i < d_size; i++) {
       dl = gsl_vector_get(D_l, i);
 
-      d_u = dl / (delta * dl + 1.0);
+      d_u = dl / (delta * dl + 1.0);  // @@
       d_e = delta * d_u;
 
       gsl_matrix_set(OmegaU, i, k, d_u);
@@ -961,7 +961,7 @@ void CalcHiQi(const gsl_vector *eval, const gsl_matrix *X,
       d = delta * dl + 1.0;
 
       gsl_vector_view mat_row = gsl_matrix_row(mat_dd, i);
-      gsl_vector_scale(&mat_row.vector, 1.0 / d);
+      gsl_vector_scale(&mat_row.vector, 1.0 / d); // @@
 
       logdet_H += log(d);
     }
diff --git a/src/param.h b/src/param.h
index f4d649f..249bf02 100644
--- a/src/param.h
+++ b/src/param.h
@@ -111,10 +111,12 @@ public:
 
 class PARAM {
 public:
-  // IO-related parameters.
+  // IO-related parameters
+  bool mode_check = true;
   bool mode_silence;
   bool mode_debug = false;
   uint issue; // enable tests for issue on github tracker
+
   int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
   int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
   vector<size_t> p_column; // Which phenotype column needs analysis.