1 files changed, 139 insertions, 77 deletions
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 4203837..9076c47 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -1,19 +1,21 @@
 /*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011-2017, Xiang Zhou
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include <bitset>
@@ -32,7 +34,7 @@
 #include <tuple>
 #include <vector>
 
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
 
 #include "gsl/gsl_version.h"
 
@@ -40,6 +42,7 @@
 #pragma message "GSL version " GSL_VERSION
 #endif
 
+#include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
 #include "gsl/gsl_linalg.h"
@@ -48,21 +51,49 @@
 #include "gsl/gsl_eigen.h"
 
 #include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
+#include "fastblas.h"
 #include "lapack.h"
 #include "mathfunc.h"
 
 using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
 
 bool has_nan(const vector<double> v) {
   for (const auto& e: v) {
-    if (std::isnan(e))
+    if (is_nan(e))
       return true;
   }
   return false;
 }
 
+bool has_nan(const gsl_vector *v) {
+  for (size_t i = 0; i < v->size; ++i)
+    if (gsl_isnan(gsl_vector_get(v,i))) return true;
+  return false;
+}
+bool has_inf(const gsl_vector *v) {
+  for (size_t i = 0; i < v->size; ++i) {
+    auto value = gsl_vector_get(v,i);
+    if (gsl_isinf(value) != 0) return true;
+  }
+  return false;
+}
+bool has_nan(const gsl_matrix *m) {
+  for (size_t i = 0; i < m->size1; ++i)
+    for (size_t j = 0; j < m->size2; ++j)
+      if (gsl_isnan(gsl_matrix_get(m,i,j))) return true;
+  return false;
+}
+bool has_inf(const gsl_matrix *m) {
+  for (size_t i = 0; i < m->size1; ++i)
+    for (size_t j = 0; j < m->size2; ++j) {
+      auto value = gsl_matrix_get(m,i,j);
+      if (gsl_isinf(value) != 0) return true;
+    }
+  return false;
+}
+
 // calculate variance of a vector
 double VectorVar(const gsl_vector *v) {
   double d, m = 0.0, m2 = 0.0;
@@ -79,8 +110,8 @@ double VectorVar(const gsl_vector *v) {
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G) {
   double d;
-  gsl_vector *w = gsl_vector_alloc(G->size1);
-  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+  gsl_vector *w = gsl_vector_safe_alloc(G->size1);
+  gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
   gsl_vector_set_all(w, 1.0);
 
   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -95,8 +126,8 @@ void CenterMatrix(gsl_matrix *G) {
     }
   }
 
-  gsl_vector_free(w);
-  gsl_vector_free(Gw);
+  gsl_vector_safe_free(w);
+  gsl_vector_safe_free(Gw);
 
   return;
 }
@@ -104,7 +135,7 @@ void CenterMatrix(gsl_matrix *G) {
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
   double d, wtw;
-  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+  gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
 
   gsl_blas_ddot(w, w, &wtw);
   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -119,19 +150,19 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
     }
   }
 
-  gsl_vector_free(Gw);
+  gsl_vector_safe_free(Gw);
 
   return;
 }
 
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
-  gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
-  gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
+  gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
+  gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
 
@@ -155,12 +186,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
 
   gsl_matrix_add(G, Gtmp);
 
-  gsl_matrix_free(WtW);
-  gsl_matrix_free(WtWi);
-  gsl_matrix_free(WtWiWt);
-  gsl_matrix_free(GW);
-  gsl_matrix_free(WtGW);
-  gsl_matrix_free(Gtmp);
+  gsl_matrix_safe_free(WtW);
+  gsl_matrix_safe_free(WtWi);
+  gsl_matrix_safe_free(WtWiWt);
+  gsl_matrix_safe_free(GW);
+  gsl_matrix_safe_free(WtGW);
+  gsl_matrix_safe_free(Gtmp);
 
   return;
 }
@@ -210,8 +241,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
   auto m = G->data;
   // upper triangle
   auto size = G->size1;
-  for(auto c = 0; c < size; c++) {
-    for(auto r = 0; r < c; r++) {
+  for(size_t c = 0; c < size; c++) {
+    for(size_t r = 0; r < c; r++) {
       int x1 = c, y1 = r, x2 = r, y2 = c;
       auto idx1 = y1*size+x1, idx2 = y2*size+x2;
       // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
@@ -226,8 +257,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
 
 bool isMatrixPositiveDefinite(const gsl_matrix *G) {
   enforce(G->size1 == G->size2);
-  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
-  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
   auto handler = gsl_set_error_handler_off();
 #if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
   auto s = gsl_linalg_cholesky_decomp1(G2);
@@ -235,20 +266,24 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G) {
   auto s = gsl_linalg_cholesky_decomp(G2);
 #endif
   gsl_set_error_handler(handler);
+  if (s == GSL_SUCCESS) {
+    gsl_matrix_safe_free(G2);
+    return true;
+  }
   gsl_matrix_free(G2);
-  return (s == GSL_SUCCESS);
+  return (false);
 }
 
 gsl_vector *getEigenValues(const gsl_matrix *G) {
   enforce(G->size1 == G->size2);
-  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
-  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
   auto eworkspace = gsl_eigen_symm_alloc(G->size1);
   enforce(eworkspace);
-  gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+  gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
   enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
   gsl_eigen_symm_free(eworkspace);
-  gsl_matrix_free(G2);
+  gsl_matrix_safe_free(G2);
   return eigenvalues;
 }
 
@@ -256,11 +291,27 @@ gsl_vector *getEigenValues(const gsl_matrix *G) {
 // by default 1E-5.
 // Returns success, eigen min, eigen min-but-1, eigen max
 
+tuple<double, double, double> minmax(const gsl_vector *v) {
+  auto min  = v->data[0];
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
+    auto value = std::abs(v->data[i]);
+    if (value < min) {
+      min1 = min;
+      min = value;
+    }
+    if (value > max)
+      max = value;
+  }
+  return std::make_tuple(min, min1, max);
+}
+
 tuple<double, double, double> abs_minmax(const gsl_vector *v) {
   auto min  = std::abs(v->data[0]);
-  auto min1 = std::abs(v->data[0]);
-  auto max  = std::abs(v->data[0]);
-  for (auto i=0; i<v->size; i++) {
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
     auto value = std::abs(v->data[i]);
     if (value < min) {
       min1 = min;
@@ -276,7 +327,7 @@ tuple<double, double, double> abs_minmax(const gsl_vector *v) {
 // the lowest value
 bool has_negative_values_but_one(const gsl_vector *v) {
   bool one_skipped = false;
-  for (auto i=0; i<v->size; i++) {
+  for (size_t i=0; i<v->size; i++) {
     if (v->data[i] < 0.0) {
       if (one_skipped)
         return true;
@@ -286,11 +337,12 @@ bool has_negative_values_but_one(const gsl_vector *v) {
   return false;
 }
 
-uint count_small_values(const gsl_vector *v, double min) {
+uint count_abs_small_values(const gsl_vector *v, double min) {
   uint count = 0;
-  for (auto i=0; i<v->size; i++) {
-    if (v->data[i] < min)
+  for (size_t i=0; i<v->size; i++) {
+    if (std::abs(v->data[i]) < min) {
       count += 1;
+    }
   }
   return count;
 }
@@ -298,24 +350,35 @@ uint count_small_values(const gsl_vector *v, double min) {
 // Check for matrix being ill conditioned by taking the eigen values
 // and the ratio of max and min but one (min is expected to be zero).
 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
-  bool ret_valid = true;
-
   auto t = abs_minmax(eigenvalues);
   auto absmin = get<0>(t);
   auto absmin1 = get<1>(t);
   auto absmax = get<2>(t);
   if (absmax/absmin1 > max_ratio) {
     #if !NDEBUG
-    cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
+    auto t = minmax(eigenvalues);
+    auto min = get<0>(t);
+    auto min1 = get<1>(t);
+    auto max = get<2>(t);
+    cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
     #endif
-    ret_valid = false;
+    return true;
   }
-  return ret_valid;
+  return false;
+}
+
+double sum(const double *m, size_t rows, size_t cols) {
+  double sum = 0.0;
+  for (size_t i = 0; i<rows*cols; i++)
+    sum += m[i];
+  return sum;
 }
 
 double SumVector(const gsl_vector *v) {
   double sum = 0;
-  for (int i = 0; i < v->size; i++ ) {
+  for (size_t i = 0; i < v->size; i++ ) {
     sum += gsl_vector_get(v, i);
   }
   return( sum );
@@ -337,9 +400,9 @@ double CenterVector(gsl_vector *y) {
 
 // Center the vector y.
 void CenterVector(gsl_vector *y, const gsl_matrix *W) {
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wty = gsl_vector_alloc(W->size2);
-  gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
+  gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
   gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
@@ -351,9 +414,9 @@ void CenterVector(gsl_vector *y, const gsl_matrix *W) {
 
   gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
 
-  gsl_matrix_free(WtW);
-  gsl_vector_free(Wty);
-  gsl_vector_free(WtWiWty);
+  gsl_matrix_safe_free(WtW);
+  gsl_vector_safe_free(Wty);
+  gsl_vector_safe_free(WtWiWty);
 
   return;
 }
@@ -379,22 +442,18 @@ void StandardizeVector(gsl_vector *y) {
 
 // Calculate UtX.
 void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
-  gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
-  gsl_matrix_memcpy(X, UtX);
-  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
-  gsl_matrix_free(X);
-
-  return;
+  gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
+  gsl_matrix_safe_memcpy(X, UtX);
+  fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+  gsl_matrix_safe_free(X);
 }
 
 void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
-  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
-  return;
+  fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
 }
 
 void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
   gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
-  return;
 }
 
 // Kronecker product.
@@ -403,7 +462,7 @@ void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
     for (size_t j = 0; j < K->size2; j++) {
       gsl_matrix_view H_sub = gsl_matrix_submatrix(
           H, i * V->size1, j * V->size2, V->size1, V->size2);
-      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_safe_memcpy(&H_sub.matrix, V);
       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
     }
   }
@@ -416,13 +475,13 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
     for (size_t j = i; j < K->size2; j++) {
       gsl_matrix_view H_sub = gsl_matrix_submatrix(
           H, i * V->size1, j * V->size2, V->size1, V->size2);
-      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_safe_memcpy(&H_sub.matrix, V);
       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
 
       if (i != j) {
         gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
             H, j * V->size1, i * V->size2, V->size1, V->size2);
-        gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
+        gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
       }
     }
   }
@@ -520,6 +579,7 @@ unsigned char Double02ToUchar(const double dosage) {
   return (int)(dosage * 100);
 }
 
+/*
 void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
                           const size_t i_row, VectorXd &x_row) {
   if (i_row < X.size()) {
@@ -531,3 +591,5 @@ void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
     exit(1);
   }
 }
+
+*/