aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--INSTALL.md55
-rw-r--r--src/lapack.cpp21
-rw-r--r--src/mathfunc.cpp2
3 files changed, 35 insertions, 43 deletions
diff --git a/INSTALL.md b/INSTALL.md
index 5c7f4bf..2a0512f 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -118,40 +118,53 @@ To link a new version, compile OpenBlas as per
[instructions](http://www.openblas.net/). You can start with the
default:
- make BINARY=64 INTERFACE64=1 NO_WARMUP=1 USE_THREAD=0 NO_LAPACK=0 NO_SHARED=1 -j 4
-
-This builds a 64-bit binary and API and no external LAPACK. This is a very conservative
-setting for testing the 64-bit API.
-
-Note, for performance we want a 64-bit binary with threading.
+ make
and/or play with the switches (listed in OpenBlas Makefile.rule)
- make BINARY=64 NO_WARMUP=0 GEMM_MULTITHREAD_THRESHOLD=4 USE_THREAD=1 NO_AFFINITY=0 NO_LAPACK=0 NUM_THREADS=64 INTERFACE64=1 ONLY_CBLAS=1 NO_SHARED=1 -j 4
+ make BINARY=64 NO_WARMUP=0 GEMM_MULTITHREAD_THRESHOLD=4 USE_THREAD=1 NO_AFFINITY=0 NO_LAPACK=1 NUM_THREADS=64 NO_SHARED=1
and you should see something like
OpenBLAS build complete. (BLAS CBLAS)
- OS ... Linux
- Architecture ... x86_64
- BINARY ... 64bit
- Use 64 bits int (equivalent to "-i8" in Fortran)
- C compiler ... GCC (command line : gcc)
- Library Name ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 32)
+ OS ... Linux
+ Architecture ... x86_64
+ BINARY ... 64bit
+ C compiler ... GCC (command line : gcc)
+ Fortran compiler ... GFORTRAN (command line : gfortran)
+ Library Name ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 64)
-Note that OpenBlas by default uses 32-bit integers which can overflow
-with large matrix sizes. Using INTERFACE=1 will fix that.
+Note that OpenBlas by default uses a 32-bit integer API which can
+overflow with large matrix sizes. We don't include LAPACK - the
+OpenBlas version gives problems around eigenvalues for some reason.
-This generates a static library which you can link using the full path
+We now have a static library which you can link using the full path
with using the GEMMA Makefile:
- time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz -llapack" WITH_OPENBLAS=1 -j 4 fast-check
-
- make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz -llapack" WITH_OPENBLAS=1 -j 4 unittests
-
-NOTE: we should make this easier.
+ time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -lgslcblas -pthread -lz -llapack" WITH_OPENBLAS=1 -j 4 unittests
Latest (INT64, no gslcblas):
time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/opt/gsl2/lib/libgsl.a ~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -pthread -lz -llapack" WITH_OPENBLAS=1 OPENBLAS_INCLUDE_PATH=~/tmp/OpenBLAS/ -j 4 fast-check
+
+
+### OpenBlas 64-bit API
+
+<i>Warning: This is work in progress (WIP)</i>
+
+OpenBlas supports a 64-bit API which allows for large matrices. Unfortunately
+GEMMA does not support it yet, see https://github.com/genetics-statistics/GEMMA/issues/120
+
+For testing we can build
+
+ make BINARY=64 INTERFACE64=1 NO_WARMUP=1 USE_THREAD=0 NO_LAPACK=0 NO_SHARED=1 -j 4
+
+This builds a 64-bit binary and API and no external LAPACK. This is a very conservative
+setting for testing the 64-bit API.
+
+Note, for performance we want a 64-bit binary with threading.
+
+ make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/opt/gsl2/lib/libgsl.a ~/tmp/OpenBLAS/libopenblas_haswell-r0.3.0.dev.a ~/.guix-profile/lib/libgfortran.a ~/.guix-profile/lib/libquadmath.a -pthread -lz" WITH_OPENBLAS=1 OPENBLAS_INCLUDE_PATH=~/tmp/OpenBLAS/ -j 4 fast-check
+
+Note we don't include standard lapack, because it is 32-bits.
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 26333b6..d15446b 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -306,27 +306,6 @@ double LULndet(const gsl_matrix *LU) {
return gsl_linalg_LU_lndet((gsl_matrix *)LU);
}
-/*
-double LULndet(gsl_matrix_float *LU) {
- gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
- double d;
-
- // Copy float matrix to double.
- for (size_t i = 0; i < LU->size1; i++) {
- for (size_t j = 0; j < LU->size2; j++) {
- gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
- }
- }
-
- // LU decomposition.
- d = gsl_linalg_LU_lndet(LU_double);
-
- // Free matrix
- gsl_matrix_free(LU_double);
- return d;
-}
-*/
-
// LU solve.
void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
const gsl_vector *b, gsl_vector *x) {
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 1f24e74..644b3e8 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -350,7 +350,7 @@ bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
auto absmax = get<2>(t);
if (absmax/absmin1 > max_ratio) {
#if !NDEBUG
- cerr << "**** DEBUG: Ratio suggests matrix is ill conditioned" << endl;
+ cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
auto t = minmax(eigenvalues);
auto min = get<0>(t);
auto min1 = get<1>(t);