aboutsummaryrefslogtreecommitdiff
path: root/src/mathfunc.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/mathfunc.cpp')
-rw-r--r--src/mathfunc.cpp216
1 files changed, 139 insertions, 77 deletions
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 4203837..9076c47 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -1,19 +1,21 @@
/*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011-2017, Xiang Zhou
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
+ Genome-wide Efficient Mixed Model Association (GEMMA)
+ Copyright © 2011-2017, Xiang Zhou
+ Copyright © 2017, Peter Carbonetto
+ Copyright © 2017, Pjotr Prins
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <bitset>
@@ -32,7 +34,7 @@
#include <tuple>
#include <vector>
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
#include "gsl/gsl_version.h"
@@ -40,6 +42,7 @@
#pragma message "GSL version " GSL_VERSION
#endif
+#include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite
#include "gsl/gsl_blas.h"
#include "gsl/gsl_cdf.h"
#include "gsl/gsl_linalg.h"
@@ -48,21 +51,49 @@
#include "gsl/gsl_eigen.h"
#include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
+#include "fastblas.h"
#include "lapack.h"
#include "mathfunc.h"
using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
bool has_nan(const vector<double> v) {
for (const auto& e: v) {
- if (std::isnan(e))
+ if (is_nan(e))
return true;
}
return false;
}
+bool has_nan(const gsl_vector *v) {
+ for (size_t i = 0; i < v->size; ++i)
+ if (gsl_isnan(gsl_vector_get(v,i))) return true;
+ return false;
+}
+bool has_inf(const gsl_vector *v) {
+ for (size_t i = 0; i < v->size; ++i) {
+ auto value = gsl_vector_get(v,i);
+ if (gsl_isinf(value) != 0) return true;
+ }
+ return false;
+}
+bool has_nan(const gsl_matrix *m) {
+ for (size_t i = 0; i < m->size1; ++i)
+ for (size_t j = 0; j < m->size2; ++j)
+ if (gsl_isnan(gsl_matrix_get(m,i,j))) return true;
+ return false;
+}
+bool has_inf(const gsl_matrix *m) {
+ for (size_t i = 0; i < m->size1; ++i)
+ for (size_t j = 0; j < m->size2; ++j) {
+ auto value = gsl_matrix_get(m,i,j);
+ if (gsl_isinf(value) != 0) return true;
+ }
+ return false;
+}
+
// calculate variance of a vector
double VectorVar(const gsl_vector *v) {
double d, m = 0.0, m2 = 0.0;
@@ -79,8 +110,8 @@ double VectorVar(const gsl_vector *v) {
// Center the matrix G.
void CenterMatrix(gsl_matrix *G) {
double d;
- gsl_vector *w = gsl_vector_alloc(G->size1);
- gsl_vector *Gw = gsl_vector_alloc(G->size1);
+ gsl_vector *w = gsl_vector_safe_alloc(G->size1);
+ gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
gsl_vector_set_all(w, 1.0);
gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -95,8 +126,8 @@ void CenterMatrix(gsl_matrix *G) {
}
}
- gsl_vector_free(w);
- gsl_vector_free(Gw);
+ gsl_vector_safe_free(w);
+ gsl_vector_safe_free(Gw);
return;
}
@@ -104,7 +135,7 @@ void CenterMatrix(gsl_matrix *G) {
// Center the matrix G.
void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
double d, wtw;
- gsl_vector *Gw = gsl_vector_alloc(G->size1);
+ gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
gsl_blas_ddot(w, w, &wtw);
gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -119,19 +150,19 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
}
}
- gsl_vector_free(Gw);
+ gsl_vector_safe_free(Gw);
return;
}
// Center the matrix G.
void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
- gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
- gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
+ gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
+ gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -155,12 +186,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
gsl_matrix_add(G, Gtmp);
- gsl_matrix_free(WtW);
- gsl_matrix_free(WtWi);
- gsl_matrix_free(WtWiWt);
- gsl_matrix_free(GW);
- gsl_matrix_free(WtGW);
- gsl_matrix_free(Gtmp);
+ gsl_matrix_safe_free(WtW);
+ gsl_matrix_safe_free(WtWi);
+ gsl_matrix_safe_free(WtWiWt);
+ gsl_matrix_safe_free(GW);
+ gsl_matrix_safe_free(WtGW);
+ gsl_matrix_safe_free(Gtmp);
return;
}
@@ -210,8 +241,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
auto m = G->data;
// upper triangle
auto size = G->size1;
- for(auto c = 0; c < size; c++) {
- for(auto r = 0; r < c; r++) {
+ for(size_t c = 0; c < size; c++) {
+ for(size_t r = 0; r < c; r++) {
int x1 = c, y1 = r, x2 = r, y2 = c;
auto idx1 = y1*size+x1, idx2 = y2*size+x2;
// printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
@@ -226,8 +257,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
bool isMatrixPositiveDefinite(const gsl_matrix *G) {
enforce(G->size1 == G->size2);
- auto G2 = gsl_matrix_alloc(G->size1, G->size2);
- enforce_gsl(gsl_matrix_memcpy(G2,G));
+ auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+ enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
auto handler = gsl_set_error_handler_off();
#if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
auto s = gsl_linalg_cholesky_decomp1(G2);
@@ -235,20 +266,24 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G) {
auto s = gsl_linalg_cholesky_decomp(G2);
#endif
gsl_set_error_handler(handler);
+ if (s == GSL_SUCCESS) {
+ gsl_matrix_safe_free(G2);
+ return true;
+ }
gsl_matrix_free(G2);
- return (s == GSL_SUCCESS);
+ return (false);
}
gsl_vector *getEigenValues(const gsl_matrix *G) {
enforce(G->size1 == G->size2);
- auto G2 = gsl_matrix_alloc(G->size1, G->size2);
- enforce_gsl(gsl_matrix_memcpy(G2,G));
+ auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+ enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
auto eworkspace = gsl_eigen_symm_alloc(G->size1);
enforce(eworkspace);
- gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+ gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
gsl_eigen_symm_free(eworkspace);
- gsl_matrix_free(G2);
+ gsl_matrix_safe_free(G2);
return eigenvalues;
}
@@ -256,11 +291,27 @@ gsl_vector *getEigenValues(const gsl_matrix *G) {
// by default 1E-5.
// Returns success, eigen min, eigen min-but-1, eigen max
+tuple<double, double, double> minmax(const gsl_vector *v) {
+ auto min = v->data[0];
+ auto min1 = min;
+ auto max = min;
+ for (size_t i=1; i<v->size; i++) {
+ auto value = std::abs(v->data[i]);
+ if (value < min) {
+ min1 = min;
+ min = value;
+ }
+ if (value > max)
+ max = value;
+ }
+ return std::make_tuple(min, min1, max);
+}
+
tuple<double, double, double> abs_minmax(const gsl_vector *v) {
auto min = std::abs(v->data[0]);
- auto min1 = std::abs(v->data[0]);
- auto max = std::abs(v->data[0]);
- for (auto i=0; i<v->size; i++) {
+ auto min1 = min;
+ auto max = min;
+ for (size_t i=1; i<v->size; i++) {
auto value = std::abs(v->data[i]);
if (value < min) {
min1 = min;
@@ -276,7 +327,7 @@ tuple<double, double, double> abs_minmax(const gsl_vector *v) {
// the lowest value
bool has_negative_values_but_one(const gsl_vector *v) {
bool one_skipped = false;
- for (auto i=0; i<v->size; i++) {
+ for (size_t i=0; i<v->size; i++) {
if (v->data[i] < 0.0) {
if (one_skipped)
return true;
@@ -286,11 +337,12 @@ bool has_negative_values_but_one(const gsl_vector *v) {
return false;
}
-uint count_small_values(const gsl_vector *v, double min) {
+uint count_abs_small_values(const gsl_vector *v, double min) {
uint count = 0;
- for (auto i=0; i<v->size; i++) {
- if (v->data[i] < min)
+ for (size_t i=0; i<v->size; i++) {
+ if (std::abs(v->data[i]) < min) {
count += 1;
+ }
}
return count;
}
@@ -298,24 +350,35 @@ uint count_small_values(const gsl_vector *v, double min) {
// Check for matrix being ill conditioned by taking the eigen values
// and the ratio of max and min but one (min is expected to be zero).
bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
- bool ret_valid = true;
-
auto t = abs_minmax(eigenvalues);
auto absmin = get<0>(t);
auto absmin1 = get<1>(t);
auto absmax = get<2>(t);
if (absmax/absmin1 > max_ratio) {
#if !NDEBUG
- cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl;
+ cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
+ auto t = minmax(eigenvalues);
+ auto min = get<0>(t);
+ auto min1 = get<1>(t);
+ auto max = get<2>(t);
+ cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
+ cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
#endif
- ret_valid = false;
+ return true;
}
- return ret_valid;
+ return false;
+}
+
+double sum(const double *m, size_t rows, size_t cols) {
+ double sum = 0.0;
+ for (size_t i = 0; i<rows*cols; i++)
+ sum += m[i];
+ return sum;
}
double SumVector(const gsl_vector *v) {
double sum = 0;
- for (int i = 0; i < v->size; i++ ) {
+ for (size_t i = 0; i < v->size; i++ ) {
sum += gsl_vector_get(v, i);
}
return( sum );
@@ -337,9 +400,9 @@ double CenterVector(gsl_vector *y) {
// Center the vector y.
void CenterVector(gsl_vector *y, const gsl_matrix *W) {
- gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
- gsl_vector *Wty = gsl_vector_alloc(W->size2);
- gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
+ gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+ gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
+ gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
@@ -351,9 +414,9 @@ void CenterVector(gsl_vector *y, const gsl_matrix *W) {
gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
- gsl_matrix_free(WtW);
- gsl_vector_free(Wty);
- gsl_vector_free(WtWiWty);
+ gsl_matrix_safe_free(WtW);
+ gsl_vector_safe_free(Wty);
+ gsl_vector_safe_free(WtWiWty);
return;
}
@@ -379,22 +442,18 @@ void StandardizeVector(gsl_vector *y) {
// Calculate UtX.
void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
- gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
- gsl_matrix_memcpy(X, UtX);
- eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
- gsl_matrix_free(X);
-
- return;
+ gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
+ gsl_matrix_safe_memcpy(X, UtX);
+ fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+ gsl_matrix_safe_free(X);
}
void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
- eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
- return;
+ fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
}
void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
- return;
}
// Kronecker product.
@@ -403,7 +462,7 @@ void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
for (size_t j = 0; j < K->size2; j++) {
gsl_matrix_view H_sub = gsl_matrix_submatrix(
H, i * V->size1, j * V->size2, V->size1, V->size2);
- gsl_matrix_memcpy(&H_sub.matrix, V);
+ gsl_matrix_safe_memcpy(&H_sub.matrix, V);
gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
}
}
@@ -416,13 +475,13 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
for (size_t j = i; j < K->size2; j++) {
gsl_matrix_view H_sub = gsl_matrix_submatrix(
H, i * V->size1, j * V->size2, V->size1, V->size2);
- gsl_matrix_memcpy(&H_sub.matrix, V);
+ gsl_matrix_safe_memcpy(&H_sub.matrix, V);
gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
if (i != j) {
gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
H, j * V->size1, i * V->size2, V->size1, V->size2);
- gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
+ gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
}
}
}
@@ -520,6 +579,7 @@ unsigned char Double02ToUchar(const double dosage) {
return (int)(dosage * 100);
}
+/*
void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
const size_t i_row, VectorXd &x_row) {
if (i_row < X.size()) {
@@ -531,3 +591,5 @@ void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
exit(1);
}
}
+
+*/