diff options
Diffstat (limited to 'src/mathfunc.cpp')
-rw-r--r-- | src/mathfunc.cpp | 216 |
1 files changed, 139 insertions, 77 deletions
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp index 4203837..9076c47 100644 --- a/src/mathfunc.cpp +++ b/src/mathfunc.cpp @@ -1,19 +1,21 @@ /* - Genome-wide Efficient Mixed Model Association (GEMMA) - Copyright (C) 2011-2017, Xiang Zhou - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. + Genome-wide Efficient Mixed Model Association (GEMMA) + Copyright © 2011-2017, Xiang Zhou + Copyright © 2017, Peter Carbonetto + Copyright © 2017, Pjotr Prins + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <bitset> @@ -32,7 +34,7 @@ #include <tuple> #include <vector> -#include "Eigen/Dense" +// #include "Eigen/Dense" #include "gsl/gsl_version.h" @@ -40,6 +42,7 @@ #pragma message "GSL version " GSL_VERSION #endif +#include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite #include "gsl/gsl_blas.h" #include "gsl/gsl_cdf.h" #include "gsl/gsl_linalg.h" @@ -48,21 +51,49 @@ #include "gsl/gsl_eigen.h" #include "debug.h" -#include "eigenlib.h" +// #include "eigenlib.h" +#include "fastblas.h" #include "lapack.h" #include "mathfunc.h" using namespace std; -using namespace Eigen; +// using namespace Eigen; bool has_nan(const vector<double> v) { for (const auto& e: v) { - if (std::isnan(e)) + if (is_nan(e)) return true; } return false; } +bool has_nan(const gsl_vector *v) { + for (size_t i = 0; i < v->size; ++i) + if (gsl_isnan(gsl_vector_get(v,i))) return true; + return false; +} +bool has_inf(const gsl_vector *v) { + for (size_t i = 0; i < v->size; ++i) { + auto value = gsl_vector_get(v,i); + if (gsl_isinf(value) != 0) return true; + } + return false; +} +bool has_nan(const gsl_matrix *m) { + for (size_t i = 0; i < m->size1; ++i) + for (size_t j = 0; j < m->size2; ++j) + if (gsl_isnan(gsl_matrix_get(m,i,j))) return true; + return false; +} +bool has_inf(const gsl_matrix *m) { + for (size_t i = 0; i < m->size1; ++i) + for (size_t j = 0; j < m->size2; ++j) { + auto value = gsl_matrix_get(m,i,j); + if (gsl_isinf(value) != 0) return true; + } + return false; +} + // calculate variance of a vector double VectorVar(const gsl_vector *v) { double d, m = 0.0, m2 = 0.0; @@ -79,8 +110,8 @@ double VectorVar(const gsl_vector *v) { // Center the matrix G. void CenterMatrix(gsl_matrix *G) { double d; - gsl_vector *w = gsl_vector_alloc(G->size1); - gsl_vector *Gw = gsl_vector_alloc(G->size1); + gsl_vector *w = gsl_vector_safe_alloc(G->size1); + gsl_vector *Gw = gsl_vector_safe_alloc(G->size1); gsl_vector_set_all(w, 1.0); gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw); @@ -95,8 +126,8 @@ void CenterMatrix(gsl_matrix *G) { } } - gsl_vector_free(w); - gsl_vector_free(Gw); + gsl_vector_safe_free(w); + gsl_vector_safe_free(Gw); return; } @@ -104,7 +135,7 @@ void CenterMatrix(gsl_matrix *G) { // Center the matrix G. void CenterMatrix(gsl_matrix *G, const gsl_vector *w) { double d, wtw; - gsl_vector *Gw = gsl_vector_alloc(G->size1); + gsl_vector *Gw = gsl_vector_safe_alloc(G->size1); gsl_blas_ddot(w, w, &wtw); gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw); @@ -119,19 +150,19 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w) { } } - gsl_vector_free(Gw); + gsl_vector_safe_free(Gw); return; } // Center the matrix G. void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) { - gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); - gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2); - gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1); - gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2); - gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2); - gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1); + gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); + gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2); + gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1); + gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2); + gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2); + gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); @@ -155,12 +186,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) { gsl_matrix_add(G, Gtmp); - gsl_matrix_free(WtW); - gsl_matrix_free(WtWi); - gsl_matrix_free(WtWiWt); - gsl_matrix_free(GW); - gsl_matrix_free(WtGW); - gsl_matrix_free(Gtmp); + gsl_matrix_safe_free(WtW); + gsl_matrix_safe_free(WtWi); + gsl_matrix_safe_free(WtWiWt); + gsl_matrix_safe_free(GW); + gsl_matrix_safe_free(WtGW); + gsl_matrix_safe_free(Gtmp); return; } @@ -210,8 +241,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) { auto m = G->data; // upper triangle auto size = G->size1; - for(auto c = 0; c < size; c++) { - for(auto r = 0; r < c; r++) { + for(size_t c = 0; c < size; c++) { + for(size_t r = 0; r < c; r++) { int x1 = c, y1 = r, x2 = r, y2 = c; auto idx1 = y1*size+x1, idx2 = y2*size+x2; // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]); @@ -226,8 +257,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) { bool isMatrixPositiveDefinite(const gsl_matrix *G) { enforce(G->size1 == G->size2); - auto G2 = gsl_matrix_alloc(G->size1, G->size2); - enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_safe_memcpy(G2,G)); auto handler = gsl_set_error_handler_off(); #if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3 auto s = gsl_linalg_cholesky_decomp1(G2); @@ -235,20 +266,24 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G) { auto s = gsl_linalg_cholesky_decomp(G2); #endif gsl_set_error_handler(handler); + if (s == GSL_SUCCESS) { + gsl_matrix_safe_free(G2); + return true; + } gsl_matrix_free(G2); - return (s == GSL_SUCCESS); + return (false); } gsl_vector *getEigenValues(const gsl_matrix *G) { enforce(G->size1 == G->size2); - auto G2 = gsl_matrix_alloc(G->size1, G->size2); - enforce_gsl(gsl_matrix_memcpy(G2,G)); + auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2); + enforce_gsl(gsl_matrix_safe_memcpy(G2,G)); auto eworkspace = gsl_eigen_symm_alloc(G->size1); enforce(eworkspace); - gsl_vector *eigenvalues = gsl_vector_alloc(G->size1); + gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1); enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace)); gsl_eigen_symm_free(eworkspace); - gsl_matrix_free(G2); + gsl_matrix_safe_free(G2); return eigenvalues; } @@ -256,11 +291,27 @@ gsl_vector *getEigenValues(const gsl_matrix *G) { // by default 1E-5. // Returns success, eigen min, eigen min-but-1, eigen max +tuple<double, double, double> minmax(const gsl_vector *v) { + auto min = v->data[0]; + auto min1 = min; + auto max = min; + for (size_t i=1; i<v->size; i++) { + auto value = std::abs(v->data[i]); + if (value < min) { + min1 = min; + min = value; + } + if (value > max) + max = value; + } + return std::make_tuple(min, min1, max); +} + tuple<double, double, double> abs_minmax(const gsl_vector *v) { auto min = std::abs(v->data[0]); - auto min1 = std::abs(v->data[0]); - auto max = std::abs(v->data[0]); - for (auto i=0; i<v->size; i++) { + auto min1 = min; + auto max = min; + for (size_t i=1; i<v->size; i++) { auto value = std::abs(v->data[i]); if (value < min) { min1 = min; @@ -276,7 +327,7 @@ tuple<double, double, double> abs_minmax(const gsl_vector *v) { // the lowest value bool has_negative_values_but_one(const gsl_vector *v) { bool one_skipped = false; - for (auto i=0; i<v->size; i++) { + for (size_t i=0; i<v->size; i++) { if (v->data[i] < 0.0) { if (one_skipped) return true; @@ -286,11 +337,12 @@ bool has_negative_values_but_one(const gsl_vector *v) { return false; } -uint count_small_values(const gsl_vector *v, double min) { +uint count_abs_small_values(const gsl_vector *v, double min) { uint count = 0; - for (auto i=0; i<v->size; i++) { - if (v->data[i] < min) + for (size_t i=0; i<v->size; i++) { + if (std::abs(v->data[i]) < min) { count += 1; + } } return count; } @@ -298,24 +350,35 @@ uint count_small_values(const gsl_vector *v, double min) { // Check for matrix being ill conditioned by taking the eigen values // and the ratio of max and min but one (min is expected to be zero). bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) { - bool ret_valid = true; - auto t = abs_minmax(eigenvalues); auto absmin = get<0>(t); auto absmin1 = get<1>(t); auto absmax = get<2>(t); if (absmax/absmin1 > max_ratio) { #if !NDEBUG - cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl; + cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl; + auto t = minmax(eigenvalues); + auto min = get<0>(t); + auto min1 = get<1>(t); + auto max = get<2>(t); + cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl; + cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl; #endif - ret_valid = false; + return true; } - return ret_valid; + return false; +} + +double sum(const double *m, size_t rows, size_t cols) { + double sum = 0.0; + for (size_t i = 0; i<rows*cols; i++) + sum += m[i]; + return sum; } double SumVector(const gsl_vector *v) { double sum = 0; - for (int i = 0; i < v->size; i++ ) { + for (size_t i = 0; i < v->size; i++ ) { sum += gsl_vector_get(v, i); } return( sum ); @@ -337,9 +400,9 @@ double CenterVector(gsl_vector *y) { // Center the vector y. void CenterVector(gsl_vector *y, const gsl_matrix *W) { - gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2); - gsl_vector *Wty = gsl_vector_alloc(W->size2); - gsl_vector *WtWiWty = gsl_vector_alloc(W->size2); + gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); + gsl_vector *Wty = gsl_vector_safe_alloc(W->size2); + gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2); gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW); gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty); @@ -351,9 +414,9 @@ void CenterVector(gsl_vector *y, const gsl_matrix *W) { gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y); - gsl_matrix_free(WtW); - gsl_vector_free(Wty); - gsl_vector_free(WtWiWty); + gsl_matrix_safe_free(WtW); + gsl_vector_safe_free(Wty); + gsl_vector_safe_free(WtWiWty); return; } @@ -379,22 +442,18 @@ void StandardizeVector(gsl_vector *y) { // Calculate UtX. void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) { - gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2); - gsl_matrix_memcpy(X, UtX); - eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX); - gsl_matrix_free(X); - - return; + gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2); + gsl_matrix_safe_memcpy(X, UtX); + fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX); + gsl_matrix_safe_free(X); } void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) { - eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX); - return; + fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX); } void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) { gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx); - return; } // Kronecker product. @@ -403,7 +462,7 @@ void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) { for (size_t j = 0; j < K->size2; j++) { gsl_matrix_view H_sub = gsl_matrix_submatrix( H, i * V->size1, j * V->size2, V->size1, V->size2); - gsl_matrix_memcpy(&H_sub.matrix, V); + gsl_matrix_safe_memcpy(&H_sub.matrix, V); gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j)); } } @@ -416,13 +475,13 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) { for (size_t j = i; j < K->size2; j++) { gsl_matrix_view H_sub = gsl_matrix_submatrix( H, i * V->size1, j * V->size2, V->size1, V->size2); - gsl_matrix_memcpy(&H_sub.matrix, V); + gsl_matrix_safe_memcpy(&H_sub.matrix, V); gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j)); if (i != j) { gsl_matrix_view H_sub_sym = gsl_matrix_submatrix( H, j * V->size1, i * V->size2, V->size1, V->size2); - gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix); + gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix); } } } @@ -520,6 +579,7 @@ unsigned char Double02ToUchar(const double dosage) { return (int)(dosage * 100); } +/* void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, const size_t i_row, VectorXd &x_row) { if (i_row < X.size()) { @@ -531,3 +591,5 @@ void uchar_matrix_get_row(const vector<vector<unsigned char>> &X, exit(1); } } + +*/ |