9 files changed, 72 insertions, 11 deletions
diff --git a/VERSION b/VERSION
index 4ab938a..f4df5d0 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.98-pre2
+0.98-beta1
diff --git a/src/debug.cpp b/src/debug.cpp
index 2f1f861..9f9f862 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -137,7 +137,7 @@ void enable_segfpe() {
   if (!is_fpe_check_mode() || is_legacy_mode()) return;
   #ifdef __GNUC__
     #if defined(__x86_64__)
-       debug_msg("enable segfpe hardware floating point error detection");
+  // debug_msg("enable segfpe hardware floating point error detection");
        signal(SIGFPE, sighandler);
        feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
     #endif
@@ -148,7 +148,7 @@ void disable_segfpe() {
   if (!is_fpe_check_mode() || is_legacy_mode()) return;
   #ifdef __GNUC__
     #if defined(__x86_64__)
-      debug_msg("disable segfpe");
+  // debug_msg("disable segfpe");
       fedisableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
     #endif
   #endif
diff --git a/src/debug.h b/src/debug.h
index e83c813..a665a37 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -115,6 +115,9 @@ inline void fail_msg(std::string msg) {
   std::raise(SIGINT); // keep stack trace for gdb
 }
 
+#define info_msg(msg) cerr << "**** INFO: " << msg << "." << endl;
+#define msg(msg) info_msg(msg);
+
 #if defined NDEBUG
   #define __SHOW_FUNC __func__
 
diff --git a/src/lapack.cpp b/src/lapack.cpp
index 125e5a0..79d49fd 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -319,7 +319,7 @@ void LUDecomp(gsl_matrix *LU, gsl_permutation *p, int *signum) {
 void LUInvert(const gsl_matrix *LU, const gsl_permutation *p, gsl_matrix *ret_inverse) {
   // debug_msg("entering");
   auto det = LULndet(LU);
-  assert(det != 1.0);
+  enforce_msg(det != 1.0,"LU determinant is zero -> LU is not invertable");
 
   enforce_gsl(gsl_linalg_LU_invert(LU, p, ret_inverse));
 }
diff --git a/src/lmm.cpp b/src/lmm.cpp
index d08c90e..80372ee 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -296,7 +296,8 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval,
   write(Hi_eval,"Hi_eval");
   write(Uab,"Uab");
   // write(ab,"ab");
-  assert(!has_nan(Hi_eval));
+  if (is_check_mode())
+    assert(!has_nan(Hi_eval));
   assert(!has_nan(Uab));
   // assert(!has_nan(ab));
 
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index d877302..faea21e 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -321,12 +321,15 @@ double CalcQi(const gsl_vector *eval, const gsl_vector *D_l,
 }
 
 // xHiy=\sum_{k=1}^n x_k\otimes ((delta_k*Dl+I)^{-1}Ul^TVe^{-1/2}y.
+//
+// FIXME: mvlmm spends a massive amount of time here
 void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
               const gsl_matrix *X, const gsl_matrix *UltVehiY,
               gsl_vector *xHiy) {
+  debug_msg("enter");
   size_t n_size = eval->size, c_size = X->size1, d_size = D_l->size;
 
-  gsl_vector_set_zero(xHiy);
+  // gsl_vector_set_zero(xHiy);
 
   double x, delta, dl, y, d;
   for (size_t i = 0; i < d_size; i++) {
@@ -342,6 +345,7 @@ void CalcXHiY(const gsl_vector *eval, const gsl_vector *D_l,
       gsl_vector_set(xHiy, j * d_size + i, d);
     }
   }
+  // debug_msg("exit");
 
   return;
 }
diff --git a/src/param.cpp b/src/param.cpp
index 6537379..de0c257 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -1942,8 +1942,7 @@ void PARAM::CheckCvt() {
     indicator_cvt.clear();
     n_cvt = 1;
   } else if (flag_ipt == 0) {
-    cout << "no intercept term is found in the cvt file. "
-         << "a column of 1s is added." << endl;
+    info_msg("no intercept term is found in the cvt file: a column of 1s is added");
     for (vector<int>::size_type i = 0; i < indicator_idv.size(); ++i) {
       if (indicator_idv[i] == 0 || indicator_cvt[i] == 0) {
         continue;
diff --git a/src/version.h b/src/version.h
index 518a9f3..5afd64d 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,5 +1,5 @@
 // version.h generated by GEMMA scripts/gen_version_info.sh
-#define GEMMA_VERSION "0.98-pre2"
-#define GEMMA_DATE "2018-07-14"
+#define GEMMA_VERSION "0.98-beta1"
+#define GEMMA_DATE "2018-09-06"
 #define GEMMA_YEAR "2018"
-#define GEMMA_PROFILE "/gnu/store/9ahrb1swr06kjm2gr2zg0fsyvps3xqgz-profile"
+#define GEMMA_PROFILE "/gnu/store/8h46wb8hi8n492i2vlbfcz4dm44xwlpc-profile"
diff --git a/test/performance/releases.org b/test/performance/releases.org
index b54a4cf..4497d2f 100644
--- a/test/performance/releases.org
+++ b/test/performance/releases.org
@@ -10,6 +10,60 @@ core Eigenlib version and 0.97 went multi-core with
 openblas. Unfortunately I linked in lapack and an older BLAS which
 slowed things down. In 0.98 openblas is mostly used and is faster.
 
+* GEMMA 0.98-beta1
+
+#+BEGIN_SRC bash
+        linux-vdso.so.1 (0x00007ffe475d2000)
+        libgsl.so.23 => /home/wrk/opt/gemma-dev-env/lib/libgsl.so.23 (0x00007f95a21e3000)
+        libopenblas.so.0 => /home/wrk/opt/gemma-dev-env/lib/libopenblas.so.0 (0x00007f959fc45000)
+        libz.so.1 => /home/wrk/opt/gemma-dev-env/lib/libz.so.1 (0x00007f959fa2a000)
+        libgfortran.so.3 => /home/wrk/opt/gemma-dev-env/lib/libgfortran.so.3 (0x00007f959f709000)
+        libquadmath.so.0 => /home/wrk/opt/gemma-dev-env/lib/libquadmath.so.0 (0x00007f959f4c8000)
+        libstdc++.so.6 => /home/wrk/opt/gemma-dev-env/lib/libstdc++.so.6 (0x00007f959f14d000)
+        libm.so.6 => /home/wrk/opt/gemma-dev-env/lib/libm.so.6 (0x00007f959ee01000)
+        libgcc_s.so.1 => /home/wrk/opt/gemma-dev-env/lib/libgcc_s.so.1 (0x00007f959ebea000)
+        libpthread.so.0 => /home/wrk/opt/gemma-dev-env/lib/libpthread.so.0 (0x00007f959e9cc000)
+        libc.so.6 => /home/wrk/opt/gemma-dev-env/lib/libc.so.6 (0x00007f959e61a000)
+#+END_SRC
+
+#+BEGIN_SRC bash
+time ./bin/gemma -g ~/tmp/mouse_hs1940/mouse_hs1940.geno.txt.gz -p ~/tmp/mouse_hs1940/mouse_hs1940.pheno.txt -a ~/tmp/mouse_hs1940/mouse_hs1940.anno.txt -gk -no-check
+GEMMA 0.98-beta1 (2018-09-06) by Xiang Zhou and team (C) 2012-2018
+Reading Files ...
+## number of total individuals = 1940
+## number of analyzed individuals = 1410
+## number of covariates = 1
+## number of phenotypes = 1
+## number of total SNPs/var        =    12226
+## number of analyzed SNPs         =    10768
+Calculating Relatedness Matrix ...
+================================================== 100%
+
+real    0m16.875s
+user    0m25.180s
+sys     0m1.740s
+#+END_SRC
+
+#+BEGIN_SRC bash
+lario:~/izip/git/opensource/genenetwork/gemma$ time bin/gemma -g ~/tmp/mouse_hs1940/mouse_hs1940.geno.txt.gz -p ~/tmp/mouse_hs1940/mouse_hs1940.pheno.txt -n 1 -a ~/tmp/mouse_hs1940/mouse_hs1940.anno.txt -k ./output/result.cXX.txt -lmm -no-check
+GEMMA 0.98-beta1 (2018-09-06) by Xiang Zhou and team (C) 2012-2018
+Reading Files ...
+## number of total individuals = 1940
+## number of analyzed individuals = 1410
+## number of covariates = 1
+## number of phenotypes = 1
+## number of total SNPs/var        =    12226
+## number of analyzed SNPs         =    10768
+Start Eigen-Decomposition...
+pve estimate =0.608801
+se(pve) =0.032774
+================================================== 100%
+
+real    0m13.255s
+user    0m18.272s
+sys     0m3.324s
+#+END_SRC
+
 * GEMMA 0.98-pre
 
 #+BEGIN_SRC bash