aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/debug.cpp27
-rw-r--r--src/debug.h15
-rw-r--r--src/gemma.cpp66
-rw-r--r--src/lapack.cpp60
-rw-r--r--src/lapack.h2
-rw-r--r--src/mathfunc.cpp57
-rw-r--r--src/mathfunc.h9
-rw-r--r--src/mvlmm.cpp9
-rw-r--r--src/param.h3
-rw-r--r--test/src/unittests-math.cpp8
-rwxr-xr-xtest/test_suite.sh67
11 files changed, 185 insertions, 138 deletions
diff --git a/src/debug.cpp b/src/debug.cpp
index 4e37538..da0d06f 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -19,12 +19,27 @@
#include "mathfunc.h"
// Helper function called by macro validate_K(K, check)
-void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line) {
+void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line) {
if (do_check) {
- debug_msg("Validating K");
- if (!checkMatrixEigen(K)) warning_at_msg(__file,__line,"K has small or negative eigenvalues!");
- if (!isMatrixIllConditioned(K)) warning_at_msg(__file,__line,"K is ill conditioned!") if (!isMatrixSymmetric(K)) fail_at_msg(__file,__line,"K is not symmetric!" );
- if (!isMatrixPositiveDefinite(K)) fail_at_msg(__file,__line,"K is not positive definite!");
-;
+ // debug_msg("Validating K");
+ auto eigenvalues = getEigenValues(K);
+ uint count_small;
+ if (count_small = count_small_values(eigenvalues,EIGEN_MINVALUE)>1) {
+ std::string msg = "K has ";
+ msg += std::to_string(count_small);
+ msg += " eigenvalues close to zero";
+ warning_at_msg(__file,__line,msg);
+ }
+ if (!isMatrixIllConditioned(eigenvalues))
+ warning_at_msg(__file,__line,"K is ill conditioned!");
+ if (!isMatrixSymmetric(K))
+ fail_at_msg(strict,__file,__line,"K is not symmetric!" );
+ bool negative_values;
+ if (negative_values = has_negative_values_but_one(eigenvalues)) {
+ warning_at_msg(__file,__line,"K has more than one negative eigenvalues!");
+ }
+ if (count_small>=0 && negative_values && !isMatrixPositiveDefinite(K))
+ fail_at_msg(strict,__file,__line,"K is not positive definite!");
+ gsl_vector_free(eigenvalues);
}
}
diff --git a/src/debug.h b/src/debug.h
index 82dd245..f47cb4c 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -12,16 +12,21 @@ void gemma_gsl_error_handler (const char * reason,
// Validation routines
-void do_validate_K(const gsl_matrix *K, bool do_check, const char *__file, int __line);
+void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line);
#define ROUND(f) round(f * 10000.)/10000
-#define validate_K(K,check) do_validate_K(K,check,__FILE__,__LINE__)
+#define validate_K(K,check,strict) do_validate_K(K,check,strict,__FILE__,__LINE__)
#define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl;
-inline void fail_at_msg(const char *__file, int __line, const char *msg) {
- std::cerr << "**** FAIL: " << msg << " in " << __file << " at line " << __line << std::endl;
- exit(1);
+inline void fail_at_msg(bool strict, const char *__file, int __line, const char *msg) {
+ if (strict)
+ std::cerr << "**** STRICT FAIL: ";
+ else
+ std::cerr << "**** WARNING: ";
+ std::cerr << msg << " in " << __file << " at line " << __line << std::endl;
+ if (strict)
+ exit(1);
}
#if defined NDEBUG
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 4c22329..d3401a6 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -708,7 +708,8 @@ void GEMMA::PrintHelp(size_t option) {
if (option == 14) {
cout << " DEBUG OPTIONS" << endl;
- cout << " -no-checks disable checks" << endl;
+ cout << " -no-check disable checks" << endl;
+ cout << " -strict strict mode will stop when there is a problem" << endl;
cout << " -silence silent terminal display" << endl;
cout << " -debug debug output" << endl;
cout << " -nind [num] read up to num individuals" << endl;
@@ -1593,8 +1594,10 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
cPar.window_ns = atoi(str.c_str());
} else if (strcmp(argv[i], "-debug") == 0) {
cPar.mode_debug = true;
- } else if (strcmp(argv[i], "-no-checks") == 0) {
+ } else if (strcmp(argv[i], "-no-check") == 0) {
cPar.mode_check = false;
+ } else if (strcmp(argv[i], "-strict") == 0) {
+ cPar.mode_strict = true;
} else {
cout << "error! unrecognized option: " << argv[i] << endl;
cPar.error = true;
@@ -1730,7 +1733,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
cout << "error! fail to read kinship/relatedness file. " << endl;
return;
}
- // FIXME: this is strange, why read twice?
+ // This is not so elegant. Reads twice to select on idv and then cvt
ReadFile_kin(cPar.file_kin, cPar.indicator_cvt, cPar.mapID2num, cPar.k_mode,
cPar.error, G_full);
if (cPar.error == true) {
@@ -1741,20 +1744,12 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
CenterMatrix(G_full);
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
// eigen-decomposition and calculate trace_G
cout << "Start Eigen-Decomposition..." << endl;
time_start = clock();
- cPar.trace_G = EigenDecomp(G, U, eval, 0);
- cPar.trace_G = 0.0;
- for (size_t i = 0; i < eval->size; i++) {
- if (gsl_vector_get(eval, i) < 1e-10) {
- gsl_vector_set(eval, i, 0);
- }
- cPar.trace_G += gsl_vector_get(eval, i);
- }
- cPar.trace_G /= (double)eval->size;
+ cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
cPar.time_eigen = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
// calculate UtW and Uty
@@ -1889,7 +1884,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
}
// Now we have the Kinship matrix test it
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
if (cPar.a_mode == 21) {
cPar.WriteMatrix(G, "cXX");
@@ -2352,7 +2347,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
(cPar.v_traceG).clear();
double d = 0;
@@ -2721,7 +2716,7 @@ return;}
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
// is residual weights are provided, then
if (!cPar.file_weight.empty()) {
@@ -2750,9 +2745,9 @@ return;}
time_start = clock();
if (cPar.a_mode == 31) {
- cPar.trace_G = EigenDecomp(G, U, eval, 1);
+ cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 1);
} else {
- cPar.trace_G = EigenDecomp(G, U, eval, 0);
+ cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
}
if (!cPar.file_weight.empty()) {
@@ -2769,15 +2764,6 @@ return;}
}
}
- cPar.trace_G = 0.0;
- for (size_t i = 0; i < eval->size; i++) {
- if (gsl_vector_get(eval, i) < 1e-10) {
- gsl_vector_set(eval, i, 0);
- }
- cPar.trace_G += gsl_vector_get(eval, i);
- }
- cPar.trace_G /= (double)eval->size;
-
cPar.time_eigen =
(clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
} else {
@@ -3037,7 +3023,7 @@ return;}
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
} else {
cPar.ReadGenotypes(UtX, G, true);
}
@@ -3045,15 +3031,9 @@ return;}
// eigen-decomposition and calculate trace_G
cout << "Start Eigen-Decomposition..." << endl;
time_start = clock();
- cPar.trace_G = EigenDecomp(G, U, eval, 0);
- cPar.trace_G = 0.0;
- for (size_t i = 0; i < eval->size; i++) {
- if (gsl_vector_get(eval, i) < 1e-10) {
- gsl_vector_set(eval, i, 0);
- }
- cPar.trace_G += gsl_vector_get(eval, i);
- }
- cPar.trace_G /= (double)eval->size;
+
+ cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
+
cPar.time_eigen =
(clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
@@ -3154,7 +3134,7 @@ return;}
// center matrix G
CenterMatrix(G);
- validate_K(G,cPar.mode_check);
+ validate_K(G,cPar.mode_check,cPar.mode_strict);
} else {
cPar.ReadGenotypes(UtX, G, true);
@@ -3163,15 +3143,7 @@ return;}
// eigen-decomposition and calculate trace_G
cout << "Start Eigen-Decomposition..." << endl;
time_start = clock();
- cPar.trace_G = EigenDecomp(G, U, eval, 0);
- cPar.trace_G = 0.0;
- for (size_t i = 0; i < eval->size; i++) {
- if (gsl_vector_get(eval, i) < 1e-10) {
- gsl_vector_set(eval, i, 0);
- }
- cPar.trace_G += gsl_vector_get(eval, i);
- }
- cPar.trace_G /= (double)eval->size;
+ cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
cPar.time_eigen =
(clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 8f6e8ff..5b2fc56 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -23,8 +23,12 @@
#include <iostream>
#include <vector>
+#include "debug.h"
+#include "mathfunc.h"
+
using namespace std;
+/*
extern "C" void sgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
float *ALPHA, float *A, int *LDA, float *B, int *LDB,
float *BETA, float *C, int *LDC);
@@ -39,6 +43,7 @@ extern "C" void ssyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, float *A,
int *ISUPPZ, float *WORK, int *LWORK, int *IWORK,
int *LIWORK, int *INFO);
extern "C" double sdot_(int *N, float *DX, int *INCX, float *DY, int *INCY);
+*/
extern "C" void dgemm_(char *TRANSA, char *TRANSB, int *M, int *N, int *K,
double *ALPHA, double *A, int *LDA, double *B, int *LDB,
@@ -55,6 +60,7 @@ extern "C" void dsyevr_(char *JOBZ, char *RANGE, char *UPLO, int *N, double *A,
int *LIWORK, int *INFO);
extern "C" double ddot_(int *N, double *DX, int *INCX, double *DY, int *INCY);
+/*
// Cholesky decomposition, A is destroyed.
void lapack_float_cholesky_decomp(gsl_matrix_float *A) {
int N = A->size1, LDA = A->size1, INFO;
@@ -75,6 +81,7 @@ void lapack_float_cholesky_decomp(gsl_matrix_float *A) {
return;
}
+*/
// Cholesky decomposition, A is destroyed.
void lapack_cholesky_decomp(gsl_matrix *A) {
@@ -97,6 +104,7 @@ void lapack_cholesky_decomp(gsl_matrix *A) {
return;
}
+/*
// Cholesky solve, A is decomposed.
void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
gsl_vector_float *x) {
@@ -118,6 +126,7 @@ void lapack_float_cholesky_solve(gsl_matrix_float *A, const gsl_vector_float *b,
return;
}
+*/
// Cholesky solve, A is decomposed.
void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) {
@@ -140,6 +149,7 @@ void lapack_cholesky_solve(gsl_matrix *A, const gsl_vector *b, gsl_vector *x) {
return;
}
+/*
void lapack_sgemm(char *TransA, char *TransB, float alpha,
const gsl_matrix_float *A, const gsl_matrix_float *B,
float beta, gsl_matrix_float *C) {
@@ -192,6 +202,7 @@ void lapack_sgemm(char *TransA, char *TransB, float alpha,
gsl_matrix_float_free(C_t);
return;
}
+*/
void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
const gsl_matrix *B, double beta, gsl_matrix *C) {
@@ -246,6 +257,7 @@ void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
return;
}
+/*
// Eigen value decomposition, matrix A is destroyed, float seems to
// have problem with large matrices (in mac).
void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
@@ -328,7 +340,10 @@ void lapack_float_eigen_symmv(gsl_matrix_float *A, gsl_vector_float *eval,
return;
}
-// Eigenvalue decomposition, matrix A is destroyed.
+*/
+
+// Eigenvalue decomposition, matrix A is destroyed. Returns eigenvalues in
+// 'eval'. Also returns matrix 'evec' (U).
void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
const size_t flag_largematrix) {
if (flag_largematrix == 1) {
@@ -409,21 +424,45 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
return;
}
-// DO NOT set eigenvalues to be positive.
+// Does NOT set eigenvalues to be positive. G gets destroyed. Returns
+// eigen trace and values in U and eval (eigenvalues).
double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
const size_t flag_largematrix) {
lapack_eigen_symmv(G, eval, U, flag_largematrix);
// Calculate track_G=mean(diag(G)).
double d = 0.0;
- for (size_t i = 0; i < eval->size; ++i) {
+ for (size_t i = 0; i < eval->size; ++i)
+ d += gsl_vector_get(eval, i);
+
+ d /= (double)eval->size;
+
+ return d;
+}
+
+// Same as EigenDecomp but zeroes eigenvalues close to zero. When
+// negative eigenvalues remain a warning is issued.
+double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+ const size_t flag_largematrix) {
+ EigenDecomp(G,U,eval,flag_largematrix);
+ auto d = 0.0;
+ int count_negative_eigenvalues = 0;
+ for (size_t i = 0; i < eval->size; i++) {
+ if (std::abs(gsl_vector_get(eval, i)) < EIGEN_MINVALUE)
+ gsl_vector_set(eval, i, 0.0);
+ if (gsl_vector_get(eval,i) < 0.0)
+ count_negative_eigenvalues += 1;
d += gsl_vector_get(eval, i);
}
d /= (double)eval->size;
+ if (count_negative_eigenvalues > 0) {
+ warning_msg("Matrix G has more than one negative eigenvalues!");
+ }
return d;
}
+/*
// DO NOT set eigen values to be positive.
double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
gsl_vector_float *eval, const size_t flag_largematrix) {
@@ -438,6 +477,7 @@ double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
return d;
}
+*/
double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) {
double logdet_O = 0.0;
@@ -452,6 +492,7 @@ double CholeskySolve(gsl_matrix *Omega, gsl_vector *Xty, gsl_vector *OiXty) {
return logdet_O;
}
+/*
double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
gsl_vector_float *OiXty) {
double logdet_O = 0.0;
@@ -465,6 +506,7 @@ double CholeskySolve(gsl_matrix_float *Omega, gsl_vector_float *Xty,
return logdet_O;
}
+*/
// LU decomposition.
void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
@@ -472,6 +514,7 @@ void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
return;
}
+/*
void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
@@ -496,6 +539,7 @@ void LUDecomp(gsl_matrix_float *LU, gsl_permutation *p, int *signum) {
gsl_matrix_free(LU_double);
return;
}
+*/
// LU invert.
void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
@@ -504,6 +548,7 @@ void LUInvert(const gsl_matrix *LU, const gsl_permutation *p,
return;
}
+/*
void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
gsl_matrix_float *inverse) {
gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
@@ -532,6 +577,8 @@ void LUInvert(const gsl_matrix_float *LU, const gsl_permutation *p,
return;
}
+*/
+
// LU lndet.
double LULndet(gsl_matrix *LU) {
double d;
@@ -539,6 +586,7 @@ double LULndet(gsl_matrix *LU) {
return d;
}
+/*
double LULndet(gsl_matrix_float *LU) {
gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
double d;
@@ -557,6 +605,7 @@ double LULndet(gsl_matrix_float *LU) {
gsl_matrix_free(LU_double);
return d;
}
+*/
// LU solve.
void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
@@ -565,6 +614,7 @@ void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
return;
}
+/*
void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
const gsl_vector_float *b, gsl_vector_float *x) {
gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
@@ -600,7 +650,7 @@ void LUSolve(const gsl_matrix_float *LU, const gsl_permutation *p,
gsl_vector_free(x_double);
return;
}
-
+*/
bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) {
bool flag = false;
int incx = 1;
@@ -614,6 +664,7 @@ bool lapack_ddot(vector<double> &x, vector<double> &y, double &v) {
return flag;
}
+/*
bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) {
bool flag = false;
int incx = 1;
@@ -626,3 +677,4 @@ bool lapack_sdot(vector<float> &x, vector<float> &y, double &v) {
return flag;
}
+*/
diff --git a/src/lapack.h b/src/lapack.h
index ff02b96..9596667 100644
--- a/src/lapack.h
+++ b/src/lapack.h
@@ -41,6 +41,8 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
double EigenDecomp(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
const size_t flag_largematrix);
+double EigenDecomp_Zeroed(gsl_matrix *G, gsl_matrix *U, gsl_vector *eval,
+ const size_t flag_largematrix);
double EigenDecomp(gsl_matrix_float *G, gsl_matrix_float *U,
gsl_vector_float *eval, const size_t flag_largematrix);
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 3b70102..7e10f5a 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -254,59 +254,62 @@ gsl_vector *getEigenValues(const gsl_matrix *G) {
// Check whether eigen values are larger than *min*
// by default 1E-5.
-// Returns success, eigen min, eigen max
+// Returns success, eigen min, eigen min-but-1, eigen max
-tuple<double, double> abs_minmax(const gsl_vector *v) {
- auto min = std::abs(v->data[0]);
- auto max = std::abs(v->data[0]);
+tuple<double, double, double> abs_minmax(const gsl_vector *v) {
+ auto min = std::abs(v->data[0]);
+ auto min1 = std::abs(v->data[0]);
+ auto max = std::abs(v->data[0]);
for (auto i=0; i<v->size; i++) {
auto value = std::abs(v->data[i]);
- if (value < min)
+ if (value < min) {
+ min1 = min;
min = value;
+ }
if (value > max)
max = value;
}
- return std::make_tuple(min, max);
+ return std::make_tuple(min, min1, max);
}
-bool has_negative_values(const gsl_vector *v) {
+// Check for negative values. skip_min will leave out
+// the lowest value
+bool has_negative_values_but_one(const gsl_vector *v) {
+ bool one_skipped = false;
for (auto i=0; i<v->size; i++) {
if (v->data[i] < 0.0) {
- return true;
+ if (one_skipped)
+ return true;
+ one_skipped = true;
}
}
return false;
}
-bool checkMatrixEigen(const gsl_matrix *G, double min) {
- auto eigenvalues = getEigenValues(G);
- bool ret_valid = true;
-
- if (has_negative_values(eigenvalues))
- ret_valid = false;
-
- auto t = abs_minmax(eigenvalues);
- auto absmin = get<0>(t);
- if (absmin < min)
- ret_valid = false;
- gsl_vector_free(eigenvalues);
- return ret_valid;
+uint count_small_values(const gsl_vector *v, double min) {
+ uint count = 0;
+ for (auto i=0; i<v->size; i++) {
+ if (v->data[i] < min)
+ count += 1;
+ }
+ return count;
}
-bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio) {
+// Check for matrix being ill conditioned by taking the eigen values
+// and the ratio of max and min but one (min is expected to be zero).
+bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
bool ret_valid = true;
- auto eigenvalues = getEigenValues(G);
auto t = abs_minmax(eigenvalues);
auto absmin = get<0>(t);
- auto absmax = get<1>(t);
- if (absmax/absmin > max_ratio) {
+ auto absmin1 = get<1>(t);
+ auto absmax = get<2>(t);
+ if (absmax/absmin1 > max_ratio) {
#if !NDEBUG
- cerr << "**** DEBUG: Eigenvalues Min " << absmin << " Max " << absmax << " Ratio " << absmax/absmin << endl;
+ cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl;
#endif
ret_valid = false;
}
- gsl_vector_free(eigenvalues);
return ret_valid;
}
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 8a2ea64..6e20b37 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -23,6 +23,9 @@
#include "gsl/gsl_matrix.h"
#include "gsl/gsl_vector.h"
+#define CONDITIONED_MAXRATIO 2e+6 // based on http://mathworld.wolfram.com/ConditionNumber.html
+#define EIGEN_MINVALUE 1e-10
+
using namespace std;
using namespace Eigen;
@@ -34,10 +37,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w);
void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
void StandardizeMatrix(gsl_matrix *G);
double ScaleMatrix(gsl_matrix *G);
+bool has_negative_values_but_one(const gsl_vector *v);
+uint count_small_values(const gsl_vector *v, double min);
bool isMatrixPositiveDefinite(const gsl_matrix *G);
-bool isMatrixIllConditioned(const gsl_matrix *G, double max_ratio=4.0);
+bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO);
bool isMatrixSymmetric(const gsl_matrix *G);
-bool checkMatrixEigen(const gsl_matrix *G, double min=1e-5);
+gsl_vector *getEigenValues(const gsl_matrix *G);
double SumVector(const gsl_vector *v);
double CenterVector(gsl_vector *y);
void CenterVector(gsl_vector *y, const gsl_matrix *W);
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 358038f..be9fd78 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -257,14 +257,7 @@ double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, V_e_hi, VgVehi, 0.0, Lambda);
// Eigen decomposition of Lambda.
- EigenDecomp(Lambda, U_l, D_l, 0);
-
- for (size_t i = 0; i < d_size; i++) {
- d = gsl_vector_get(D_l, i);
- if (d < 0) {
- gsl_vector_set(D_l, i, 0);
- }
- }
+ EigenDecomp_Zeroed(Lambda, U_l, D_l, 0);
// Calculate UltVeh and UltVehi.
gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, U_l, V_e_h, 0.0, UltVeh);
diff --git a/src/param.h b/src/param.h
index 249bf02..08b1e10 100644
--- a/src/param.h
+++ b/src/param.h
@@ -112,7 +112,8 @@ public:
class PARAM {
public:
// IO-related parameters
- bool mode_check = true;
+ bool mode_check = true; // run data checks (slower)
+ bool mode_strict = false; // exit on some data checks
bool mode_silence;
bool mode_debug = false;
uint issue; // enable tests for issue on github tracker
diff --git a/test/src/unittests-math.cpp b/test/src/unittests-math.cpp
index 11aadf6..ac4c180 100644
--- a/test/src/unittests-math.cpp
+++ b/test/src/unittests-math.cpp
@@ -16,21 +16,21 @@ TEST_CASE( "Math functions", "[math]" ) {
copy(data, data+9, m->data);
REQUIRE( isMatrixPositiveDefinite(m) );
REQUIRE( isMatrixSymmetric(m) );
- REQUIRE( checkMatrixEigen(m,0.001) );
+ // REQUIRE( checkMatrixEigen(m,0.001) );
double data1[] = {1.0,0,0,
0,3.0,0,
0,0,2.0};
copy(data1, data1+9, m->data);
REQUIRE( isMatrixPositiveDefinite(m) );
- REQUIRE( checkMatrixEigen(m) );
+ // REQUIRE( checkMatrixEigen(m) );
double data2[] = {1,1,1,
1,1,1,
1,1,0.5};
copy(data2, data2+9, m->data);
REQUIRE( !isMatrixPositiveDefinite(m));
- REQUIRE( !checkMatrixEigen(m) );
+ // REQUIRE( !checkMatrixEigen(m) );
double data3[] = {1.0, 0, 0,
3.0,3.0, 0,
@@ -38,7 +38,7 @@ TEST_CASE( "Math functions", "[math]" ) {
copy(data3, data3+9, m->data);
REQUIRE( !isMatrixPositiveDefinite(m) );
REQUIRE( !isMatrixSymmetric(m) );
- REQUIRE( !checkMatrixEigen(m) );
+ // REQUIRE( checkMatrixEigen(m) );
// ---- NaN checks
vector<double> v = {1.0, 2.0};
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 0991c63..44eb14c 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -8,9 +8,10 @@ testCenteredRelatednessMatrixKFullLOCO1() {
-p ../example/mouse_hs1940.pheno.txt \
-a ../example/mouse_hs1940.anno.txt \
-loco 1 -gk -debug -o $outn
- assertEquals 1 $?
- # assertEquals "1940" `wc -l < $outfn`
- # assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ outfn=output/$outn.cXX.txt
+ assertEquals "1940" `wc -l < $outfn`
+ assertEquals "2246.57" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testUnivariateLinearMixedModelFullLOCO1() {
@@ -24,26 +25,24 @@ testUnivariateLinearMixedModelFullLOCO1() {
-lmm \
-debug \
-o $outn
- assertEquals 1 $?
- # grep "total computation time" < output/$outn.log.txt
- # assertEquals 0 $?
- # outfn=output/$outn.assoc.txt
- # assertEquals "951" `wc -l < $outfn`
- # assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ grep "total computation time" < output/$outn.log.txt
+ assertEquals 0 $?
+ outfn=output/$outn.assoc.txt
+ assertEquals "951" `wc -l < $outfn`
+ assertEquals "267509369.79" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testCenteredRelatednessMatrixK() {
$gemma -g ../example/mouse_hs1940.geno.txt.gz \
-p ../example/mouse_hs1940.pheno.txt \
-gk -o mouse_hs1940
- assertEquals 1 $?
- # grep "total computation time" < output/mouse_hs1940.log.txt
- # assertEquals 1 $?
- # outfn=output/mouse_hs1940.cXX.txt
- # assertEquals "1940" `wc -l < $outfn`
- # assertEquals "3763600" `wc -w < $outfn`
- # assertEquals "0.335" `head -c 5 $outfn`
- # assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ outfn=output/mouse_hs1940.cXX.txt
+ assertEquals "1940" `wc -l < $outfn`
+ assertEquals "3763600" `wc -w < $outfn`
+ assertEquals "0.335" `head -c 5 $outfn`
+ assertEquals "1119.64" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testUnivariateLinearMixedModel() {
@@ -54,12 +53,12 @@ testUnivariateLinearMixedModel() {
-k ./output/mouse_hs1940.cXX.txt \
-lmm \
-o mouse_hs1940_CD8_lmm
- assertEquals 1 $?
- # grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
- # assertEquals 0 $?
- # outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
- # assertEquals "118459" `wc -w < $outfn`
- # assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
+ assertEquals 0 $?
+ outfn=output/mouse_hs1940_CD8_lmm.assoc.txt
+ assertEquals "118459" `wc -w < $outfn`
+ assertEquals "4038557453.62" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testMultivariateLinearMixedModel() {
@@ -69,11 +68,11 @@ testMultivariateLinearMixedModel() {
-a ../example/mouse_hs1940.anno.txt \
-k ./output/mouse_hs1940.cXX.txt \
-lmm -o mouse_hs1940_CD8MCH_lmm
- assertEquals 1 $?
+ assertEquals 0 $?
- # outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
- # assertEquals "139867" `wc -w < $outfn`
- # assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
+ assertEquals "139867" `wc -w < $outfn`
+ assertEquals "4029037056.54" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
testPlinkStandardRelatednessMatrixK() {
@@ -83,9 +82,9 @@ testPlinkStandardRelatednessMatrixK() {
rm -f $outfn
$gemma -bfile $datadir/HLC \
-gk 2 -o $testname
- assertEquals 1 $?
- # assertEquals "427" `wc -l < $outfn`
- # assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ assertEquals "427" `wc -l < $outfn`
+ assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
# Test for https://github.com/genetics-statistics/GEMMA/issues/58
@@ -99,10 +98,10 @@ testPlinkMultivariateLinearMixedModel() {
-maf 0.1 \
-c $datadir/HLC_covariates.txt \
-o $testname
- assertEquals 1 $?
- # outfn=output/$testname.assoc.txt
- # assertEquals "223243" `wc -l < $outfn`
- # assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+ assertEquals 0 $?
+ outfn=output/$testname.assoc.txt
+ assertEquals "223243" `wc -l < $outfn`
+ assertEquals "89756559859.06" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
}
shunit2=`which shunit2`