Some cleanup on matrix checking

author: Pjotr Prins 2017-12-06 16:36:11 +0000
committer: Pjotr Prins 2017-12-06 16:36:11 +0000
commit: 29df576cca8052e4aa9142ce1bb0ab490b864976 (patch)
tree: f29ec0245a2cd58f9c598d8b3dae53144cc11887
parent: 996f70910c675e249fac753273b11555b1b7a4e3 (diff)
download: pangemma-29df576cca8052e4aa9142ce1bb0ab490b864976.tar.gz
9 files changed, 56 insertions, 30 deletions
diff --git a/INSTALL.md b/INSTALL.md
index f0426e8..eed88e6 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -119,20 +119,23 @@ To link a new version, compile OpenBlas as per
 
     make -j 4
 
-or play with the switches
+and/or play with the switches
 
-    make USE_THREAD=1 NUM_THREADS=16 -j 4
+    make BINARY=64 NO_WARMUP=0 GEMM_MULTITHREAD_THRESHOLD=4 USE_THREAD=1 NO_AFFINITY=1 NUM_THREADS=64 INTERFACE64=1 -j 4
 
-rendering for example:
+and you should see something like
 
-        OpenBLAS build complete. (BLAS CBLAS)
-        OS               ... Linux
-        Architecture     ... x86_64
-        BINARY           ... 64bit
-        C compiler       ... GCC  (command line : gcc)
-        Library Name     ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 16)
+    OpenBLAS build complete. (BLAS CBLAS)
 
-        To install the library, you can run "make PREFIX=/path/to/your/installation install".
+    OS               ... Linux
+    Architecture     ... x86_64
+    BINARY           ... 64bit
+    Use 64 bits int    (equivalent to "-i8" in Fortran)
+    C compiler       ... GCC  (command line : gcc)
+    Library Name     ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 32)
+
+Note that OpenBlas by default uses 32-bit integers which can overflow
+with large matrix sizes. Using INTERFACE=1 will fix that.
 
 This generates a static library which you can link using the full path
 with using the GEMMA Makefile:
@@ -142,3 +145,7 @@ with using the GEMMA Makefile:
     make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz  -llapack" WITH_OPENBLAS=1 -j 4 unittests
 
 NOTE: we should make this easier.
+
+Latest (INT64, no gslcblas):
+
+    time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/opt/gsl2/lib/libgsl.a ~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -pthread -lz  -llapack" WITH_OPENBLAS=1 OPENBLAS_INCLUDE_PATH=~/tmp/OpenBLAS/ -j 4 fast-check
diff --git a/src/debug.cpp b/src/debug.cpp
index cb98f27..4e57e3e 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -134,14 +134,14 @@ void do_validate_K(const gsl_matrix *K, const char *__pretty_function, const cha
   if (is_check_mode()) {
     // debug_msg("Validating K");
     auto eigenvalues = getEigenValues(K);
-    const uint count_small = count_small_values(eigenvalues,EIGEN_MINVALUE);
+    const uint count_small = count_abs_small_values(eigenvalues,EIGEN_MINVALUE);
     if (count_small>1) {
       std::string msg = "K has ";
       msg += std::to_string(count_small);
       msg += " eigenvalues close to zero";
       warning_at_msg(__file,__line,msg);
     }
-    if (!isMatrixIllConditioned(eigenvalues))
+    if (isMatrixIllConditioned(eigenvalues))
       warning_at_msg(__file,__line,"K is ill conditioned!");
     if (!isMatrixSymmetric(K))
       warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,"K is not symmetric!" );
diff --git a/src/fastblas.cpp b/src/fastblas.cpp
index 8d4a313..a7adc44 100644
--- a/src/fastblas.cpp
+++ b/src/fastblas.cpp
@@ -129,6 +129,7 @@ void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
   enforce(N>0);
   enforce(K>0);
 
+  // cout << sizeof(blasint) << endl;
   blasint mi = M;
   blasint ni = N;
   blasint ki = K;
diff --git a/src/io.cpp b/src/io.cpp
index 7e682b3..625a6a2 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -40,7 +40,7 @@
 #include "gsl/gsl_vector.h"
 
 #include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
 #include "fastblas.h"
 #include "gzstream.h"
 #include "io.h"
diff --git a/src/lm.cpp b/src/lm.cpp
index b94a426..9132e81 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -39,7 +39,7 @@
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 
-#include "eigenlib.h"
+// #include "eigenlib.h"
 #include "gzstream.h"
 #include "lapack.h"
 #include "lm.h"
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 0102ac0..ecc8231 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -38,7 +38,7 @@
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
 
-#include "eigenlib.h"
+// #include "eigenlib.h"
 
 #include "gzstream.h"
 #include "io.h"
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 91c0271..1f24e74 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -49,7 +49,7 @@
 #include "gsl/gsl_eigen.h"
 
 #include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
 #include "fastblas.h"
 #include "lapack.h"
 #include "mathfunc.h"
@@ -285,11 +285,27 @@ gsl_vector *getEigenValues(const gsl_matrix *G) {
 // by default 1E-5.
 // Returns success, eigen min, eigen min-but-1, eigen max
 
+tuple<double, double, double> minmax(const gsl_vector *v) {
+  auto min  = v->data[0];
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
+    auto value = std::abs(v->data[i]);
+    if (value < min) {
+      min1 = min;
+      min = value;
+    }
+    if (value > max)
+      max = value;
+  }
+  return std::make_tuple(min, min1, max);
+}
+
 tuple<double, double, double> abs_minmax(const gsl_vector *v) {
   auto min  = std::abs(v->data[0]);
-  auto min1 = std::abs(v->data[0]);
-  auto max  = std::abs(v->data[0]);
-  for (size_t i=0; i<v->size; i++) {
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
     auto value = std::abs(v->data[i]);
     if (value < min) {
       min1 = min;
@@ -315,11 +331,12 @@ bool has_negative_values_but_one(const gsl_vector *v) {
   return false;
 }
 
-uint count_small_values(const gsl_vector *v, double min) {
+uint count_abs_small_values(const gsl_vector *v, double min) {
   uint count = 0;
   for (size_t i=0; i<v->size; i++) {
-    if (v->data[i] < min)
+    if (std::abs(v->data[i]) < min) {
       count += 1;
+    }
   }
   return count;
 }
@@ -327,19 +344,23 @@ uint count_small_values(const gsl_vector *v, double min) {
 // Check for matrix being ill conditioned by taking the eigen values
 // and the ratio of max and min but one (min is expected to be zero).
 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
-  bool ret_valid = true;
-
   auto t = abs_minmax(eigenvalues);
   auto absmin = get<0>(t);
   auto absmin1 = get<1>(t);
   auto absmax = get<2>(t);
   if (absmax/absmin1 > max_ratio) {
     #if !NDEBUG
-    cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Ratio suggests matrix is ill conditioned" << endl;
+    auto t = minmax(eigenvalues);
+    auto min = get<0>(t);
+    auto min1 = get<1>(t);
+    auto max = get<2>(t);
+    cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
     #endif
-    ret_valid = false;
+    return true;
   }
-  return ret_valid;
+  return false;
 }
 
 double sum(const double *m, size_t rows, size_t cols) {
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 0d791a1..481d022 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -45,7 +45,7 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
 void StandardizeMatrix(gsl_matrix *G);
 double ScaleMatrix(gsl_matrix *G);
 bool has_negative_values_but_one(const gsl_vector *v);
-uint count_small_values(const gsl_vector *v, double min);
+uint count_abs_small_values(const gsl_vector *v, double min);
 bool isMatrixPositiveDefinite(const gsl_matrix *G);
 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO);
 bool isMatrixSymmetric(const gsl_matrix *G);
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index 29f797a..95a59d7 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -29,16 +29,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_integration.h"
 #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
 
-#include "eigenlib.h"
 #include "fastblas.h"
 #include "gzstream.h"
 #include "io.h"
author	Pjotr Prins	2017-12-06 16:36:11 +0000
committer	Pjotr Prins	2017-12-06 16:36:11 +0000
commit	29df576cca8052e4aa9142ce1bb0ab490b864976 (patch)
tree	f29ec0245a2cd58f9c598d8b3dae53144cc11887
parent	996f70910c675e249fac753273b11555b1b7a4e3 (diff)
download	pangemma-29df576cca8052e4aa9142ce1bb0ab490b864976.tar.gz