42 files changed, 2547 insertions, 3462 deletions
diff --git a/.gitignore b/.gitignore
index ef520c8..059da73 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 *.o
 *.tar.gz
 src/Eigen
+src/version.h
 example/output
 test/output
 ./output
diff --git a/.travis.yml b/.travis.yml
index ec2d049..ffd674f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,46 +1,52 @@
 language: C++
-compiler: gcc
 matrix:
+  # OSX testing is under development
+  # allow_failures:
+  #   - os: osx
   include:
     - os: linux
+      compiler: gcc
       addons:
         apt:
           sources:
             - ubuntu-toolchain-r-test
           packages:
+            # Our dev environment is a more recent GNU C++ and GSL2
             - g++-4.9
+            - libopenblas-dev 
+            - zlib1g-dev
+            - libeigen3-dev
+            - libgsl0-dev
+            - liblapack-dev
+            # - gfortran-dev for static
       env:
-         - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9"
-    - os: linux
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - g++-6
+         - MATRIX_EVAL="CC=gcc-4.9 && CXX=g++-4.9 && EIGEN_INCLUDE_PATH=/usr/include/eigen3"
+    - os: osx
+      compiler: clang
       env:
-        - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
+         - MATRIX_EVAL="EIGEN_INCLUDE_PATH=/usr/local/include/eigen3"
+#     - os: linux
+#       addons:
+#         apt:
+#           sources:
+#             - ubuntu-toolchain-r-test
+#           packages:
+#             - g++-6
+#       env:
+#         - MATRIX_EVAL="CC=gcc-6 && CXX=g++-6"
 before_install:
-  - sudo apt-get -qq update
-  - sudo apt-get install -y libopenblas-dev zlib1g-dev
-  - sudo apt-get install -y libeigen3-dev
-  - sudo apt-get install -y libgsl0-dev
-  - sudo apt-get install -y liblapack-dev
-  # for the static release version we need the following
-  # - sudo apt-get install -y gfortran-dev
-  - dpkg -l
-  - eval "${MATRIX_EVAL}"
-  - $CXX --version
+  - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew cask uninstall oclint && brew update && brew install gsl openblas zlib eigen lapack ; fi
 script:
+  - echo $MATRIX_EVAL
   - eval "${MATRIX_EVAL}"
   - $CXX --version
   # build and test debug version
-  - make CXX=$CXX WITH_OPENBLAS=1 -j 4
-  - time make CXX=$CXX WITH_OPENBLAS=1 check
-  - make clean
-  # build and test release version
-  - make CXX=$CXX FORCE_DYNAMIC=1 WITH_OPENBLAS=1 -j 4
-  - time make CXX=$CXX WITH_OPENBLAS=1 DEBUG= check
+  - make      CXX=$CXX EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH WITH_LAPACK=1 OPENBLAS_LEGACY=1 WITH_GSLCBLAS=1 -j 4 -k
+  - time make CXX=$CXX EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH WITH_LAPACK=1 OPENBLAS_LEGACY=1 WITH_GSLCBLAS=1 check
+  # - make clean
+  # build and test release version (integration test mostly)
+  # - make CXX=$CXX EIGEN_INCLUDE_PATH=$EIGEN_INCLUDE_PATH DEBUG= FORCE_DYNAMIC=1 WITH_OPENBLAS=1 OPENBLAS_LEGACY=1 -j 4
+  # - time make CXX=$CXX DEBUG= WITH_OPENBLAS=1 fast-check
   # build static release (fast-check only)
   # - make clean
   # - make CXX=$CXX TRAVIS_CI=1 -j 4 fast-check
diff --git a/INSTALL.md b/INSTALL.md
index e450a2a..7778ca8 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -14,7 +14,7 @@ GEMMA runs on Linux and MAC OSX and the runtime has the following
 dependencies:
 
 * C++ tool chain >= 4.9
-* GNU Science library (GSL) 1.x (GEMMA does not currently work with GSL >= 2).
+* GNU Science library (GSL) 1.x (note that 2.x is not yet supported)
 * blas/openblas
 * lapack
 * [Eigen3 library](http://eigen.tuxfamily.org/dox/)
@@ -65,12 +65,12 @@ if you get an Eigen error you may need to override the include
 path. E.g. to build GEMMA on GNU Guix with shared libs the following
 may work
 
-    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1
+    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3
 
 another example overriding optimization and LIB flags (so as to link
 against gslv1) would be
 
-    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 GCC_FLAGS="-Wall" LIBS="$HOME/opt/gsl1/lib/libgsl.a $HOME/opt/gsl1/lib/libgslcblas.a -L$HOME/.guix-profile/lib -pthread -llapack -lblas -lz"
+    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 GCC_FLAGS="-Wall -isystem/$HOME/opt/gsl1/include" LIBS="$HOME/opt/gsl1/lib/libgsl.a $HOME/opt/gsl1/lib/libgslcblas.a -L$HOME/.guix-profile/lib -pthread -llapack -lblas -lz"
 
 to run GEMMA tests
 
@@ -86,7 +86,10 @@ You can run gemma in the debugger with, for example
 Note that if you get <optimized out> warnings on inspecting variables you
 should compile with GCC_FLAGS="" to disable optimizations (-O3). E.g.
 
-    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 WITH_OPENBLAS=1 GCC_FLAGS=
+    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 GCC_FLAGS=
+
+If you get older OpenBlas errors you may need to add
+OPENBLAS_LEGACY=1.
 
 Other options, such as compiling with warnings, are listed in the
 Makefile.
@@ -100,3 +103,72 @@ GEMMA includes the shunit2 test framework (version 2.0).
 or
 
     ./run_tests.sh
+
+## Optimizing performance
+
+### OpenBlas
+
+Linking against a built-from-source OpenBlas is a first optimization
+step because it will optimize code for the local architecture (on my
+workstation it easily doubles speed). When you check the output .log
+file of GEMMA after a run, it will tell you how the linked-in OpenBlas
+was compiled.
+
+It is worth checking that you use OpenBlas's lapack and cblas
+interfaces instead of linking against default lapack and gslcblas
+libs.
+
+To link a new version, compile OpenBlas as per
+[instructions](http://www.openblas.net/).  You can start with the
+default:
+
+    make
+
+and/or play with the switches (listed in OpenBlas Makefile.rule)
+
+    make BINARY=64 NO_WARMUP=0 GEMM_MULTITHREAD_THRESHOLD=4 USE_THREAD=1 NO_AFFINITY=0 NO_LAPACK=1 NUM_THREADS=64 NO_SHARED=1
+
+and you should see something like
+
+    OpenBLAS build complete. (BLAS CBLAS LAPACK LAPACKE)
+
+    OS               ... Linux
+    Architecture     ... x86_64
+    BINARY           ... 64bit
+    C compiler       ... GCC  (command line : gcc)
+    Fortran compiler ... GFORTRAN  (command line : gfortran)
+    Library Name     ... libopenblas_haswellp-r0.3.0.dev.a (Multi threaded; Max num-threads is 64)
+
+Note that OpenBlas by default uses a 32-bit integer API which can
+overflow with large matrix sizes. We don't include LAPACK - the
+OpenBlas version gives problems around eigenvalues for some reason.
+
+We now have a static library which you can link using the full path
+with using the GEMMA Makefile:
+
+    time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -lgsl -pthread -lz" -j 4 unittests
+
+Latest (INT64, no gslcblas):
+
+    time env OPENBLAS_NUM_THREADS=4 make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/opt/gsl2/lib/libgsl.a ~/tmp/OpenBLAS/libopenblas_haswellp-r0.3.0.dev.a -pthread -lz  -llapack" OPENBLAS_INCLUDE_PATH=~/tmp/OpenBLAS/ -j 4 fast-check
+
+
+### OpenBlas 64-bit API
+
+<i>Warning: This is work in progress (WIP)</i>
+
+OpenBlas supports a 64-bit API which allows for large matrices. Unfortunately
+GEMMA does not support it yet, see https://github.com/genetics-statistics/GEMMA/issues/120
+
+For testing we can build
+
+    make BINARY=64 INTERFACE64=1 NO_WARMUP=1 USE_THREAD=0 NO_LAPACK=0 NO_SHARED=1 -j 4
+
+This builds a 64-bit binary and API and no external LAPACK. This is a very conservative
+setting for testing the 64-bit API.
+
+Note, for performance we want a 64-bit binary with threading.
+
+    make EIGEN_INCLUDE_PATH=~/.guix-profile/include/eigen3 LIBS="~/opt/gsl2/lib/libgsl.a ~/tmp/OpenBLAS/libopenblas_haswell-r0.3.0.dev.a ~/.guix-profile/lib/libgfortran.a ~/.guix-profile/lib/libquadmath.a -pthread -lz" OPENBLAS_INCLUDE_PATH=~/tmp/OpenBLAS/ -j 4 fast-check
+
+Note we don't include standard lapack, because it is 32-bits.
diff --git a/Makefile b/Makefile
index 176dd2c..f8bacbf 100644
--- a/Makefile
+++ b/Makefile
@@ -10,13 +10,13 @@
 #
 # Examples:
 #
-#    Make GEMMA on Linux with OPENBLAS support:
+#    Make GEMMA on Linux without OPENBLAS support:
 #
-#      make WITH_OPENBLAS=1
+#      make WITH_OPENBLAS=
 #
 #    Disable debug info and checks (slightly faster release mode)
 #
-#      make WITH_OPENBLAS=1 DEBUG=
+#      make DEBUG=
 #
 #    Force static compilation
 #
@@ -26,22 +26,35 @@
 #
 #      make check
 #
+#    Run quick (development) tests with
+#
+#      make fast-check
+#
+#    Run full (lengthy) tests with
+#
+#      make check-all
+#
 #    See also the INSTALL.md document in the source tree at
 #
 #      https://github.com/genetics-statistics/GEMMA/blob/master/INSTALL.md
 
+GEMMA_VERSION = $(shell cat ./VERSION)
+
 # Set this variable to either LNX or MAC
 SYS                    = LNX # LNX|MAC (Linux is the default)
 # Leave blank after "=" to disable; put "= 1" to enable
-DIST_NAME              = gemma-0.97.3
-DEBUG                  = 1   # DEBUG mode, set DEBUG=0 for a release
+DIST_NAME              = gemma-$(GEMMA_VERSION)
+DEBUG                  = 1                # DEBUG mode, set DEBUG=0 for a release
 SHOW_COMPILER_WARNINGS =
-WITH_LAPACK            = 1
-WITH_OPENBLAS          =     # Defaults to LAPACK - OPENBLAS may be faster
-FORCE_STATIC           =     # Static linking of libraries
-GCC_FLAGS              = -O3 # extra flags -Wl,--allow-multiple-definition
-TRAVIS_CI              =     # used by TRAVIS for testing
-EIGEN_INCLUDE_PATH=/usr/include/eigen3
+WITH_OPENBLAS          = 1                # Without OpenBlas uses LAPACK
+WITH_LAPACK            =                  # Force linking LAPACK (if OpenBlas lacks it)
+WITH_GSLCBLAS          =                  # Force linking gslcblas (if OpenBlas lacks it)
+OPENBLAS_LEGACY        =                  # Using older OpenBlas
+FORCE_STATIC           =                  # Static linking of libraries
+GCC_FLAGS              = -Wall -O3 -std=gnu++11 # extra flags -Wl,--allow-multiple-definition
+TRAVIS_CI              =                  # used by TRAVIS for testing
+EIGEN_INCLUDE_PATH     = /usr/include/eigen3
+OPENBLAS_INCLUDE_PATH  = /usr/local/opt/openblas/include
 
 # --------------------------------------------------------------------
 # Edit below this line with caution
@@ -58,15 +71,26 @@ else
   CPP = g++
 endif
 
-ifdef OPENBLAS
-  WITH_LAPACK =  # OPENBLAS usually includes LAPACK
+ifeq ($(CPP), clang++)
+  # macOS Homebrew settings (as used on Travis-CI)
+  GCC_FLAGS=-O3 -std=c++11 -stdlib=libc++ -isystem/$(OPENBLAS_INCLUDE_PATH) -isystem//usr/local/include/eigen3 -Wl,-L/usr/local/opt/openblas/lib
+endif
+
+ifdef WITH_OPENBLAS
+  OPENBLAS=1
+  # WITH_LAPACK =  # OPENBLAS usually includes LAPACK
+  CPPFLAGS += -DOPENBLAS -isystem/$(OPENBLAS_INCLUDE_PATH)
+  ifdef OPENBLAS_LEGACY
+    # Legacy version (mostly for Travis-CI)
+    CPPFLAGS += -DOPENBLAS_LEGACY
+  endif
 endif
 
 ifdef DEBUG
-  CPPFLAGS = -g $(GCC_FLAGS) -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+  CPPFLAGS += -g $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
 else
   # release mode
-  CPPFLAGS = -DNDEBUG $(GCC_FLAGS) -std=gnu++11 -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
+  CPPFLAGS += -DNDEBUG $(GCC_FLAGS) -isystem/$(EIGEN_INCLUDE_PATH) -Icontrib/catch-1.9.7 -Isrc
 endif
 
 ifdef SHOW_COMPILER_WARNINGS
@@ -74,27 +98,30 @@ ifdef SHOW_COMPILER_WARNINGS
 endif
 
 ifndef FORCE_STATIC
-  LIBS = -lgsl -lgslcblas -pthread -lz
+  LIBS = -lgsl -lopenblas -pthread -lz
+  ifdef WITH_GSLCBLAS
+    LIBS += -lgslcblas
+  else
+    LIBS += -lgfortran -lquadmath
+  endif
 else
   ifndef TRAVIS_CI # Travis static compile we cheat a little
     CPPFLAGS += -static
   endif
 endif
 
-OUTPUT = $(BIN_DIR)/gemma
-
-SOURCES = $(SRC_DIR)/main.cpp
+.PHONY: all
 
-HDR =
+OUTPUT = $(BIN_DIR)/gemma
 
 # Detailed libary paths, D for dynamic and S for static
 
-LIBS_LNX_D_LAPACK = -llapack
-LIBS_LNX_D_BLAS = -lblas
-LIBS_LNX_D_OPENBLAS = -lopenblas
+ifdef WITH_LAPACK
+  LIBS_LNX_D_LAPACK = -llapack
+endif
 LIBS_MAC_D_LAPACK = -framework Accelerate
 # LIBS_LNX_S_LAPACK = /usr/lib/libgsl.a  /usr/lib/libgslcblas.a /usr/lib/lapack/liblapack.a -lz
-LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran  /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
+# LIBS_LNX_S_LAPACK = /usr/lib/lapack/liblapack.a -lgfortran  /usr/lib/atlas-base/libatlas.a /usr/lib/libblas/libblas.a -Wl,--allow-multiple-definition
 
 ifdef WITH_LAPACK
   ifeq ($(SYS), MAC)
@@ -102,7 +129,7 @@ ifdef WITH_LAPACK
   else
     ifndef FORCE_STATIC
       ifdef WITH_OPENBLAS
-        LIBS += $(LIBS_LNX_D_OPENBLAS)
+        LIBS += -lopenblas
       else
         LIBS += $(LIBS_LNX_D_BLAS)
       endif
@@ -113,7 +140,7 @@ ifdef WITH_LAPACK
   endif
 endif
 
-HDR          = $(wildcard src/*.h)
+HDR          = $(wildcard src/*.h) ./src/version.h
 SOURCES      = $(wildcard src/*.cpp)
 
 # all
@@ -121,17 +148,20 @@ OBJS = $(SOURCES:.cpp=.o)
 
 all: $(OUTPUT)
 
+./src/version.h:
+	./scripts/gen_version_info.sh > src/version.h
+
 $(OUTPUT): $(OBJS)
 	$(CPP) $(CPPFLAGS) $(OBJS) $(LIBS) -o $(OUTPUT)
 
-$(OBJS)  : $(HDR)
+$(OBJS): $(HDR)
 
-.cpp.o:
-	$(CPP) $(CPPFLAGS) $(HEADERS) -c $*.cpp -o $*.o
 .SUFFIXES : .cpp .c .o $(SUFFIXES)
 
-unittests: all contrib/catch-1.9.7/catch.hpp $(TEST_SRC_DIR)/unittests-main.o $(TEST_SRC_DIR)/unittests-math.o
+./bin/unittests-gemma: contrib/catch-1.9.7/catch.hpp $(TEST_SRC_DIR)/unittests-main.o $(TEST_SRC_DIR)/unittests-math.o $(OBJS)
 	$(CPP) $(CPPFLAGS) $(TEST_SRC_DIR)/unittests-main.o  $(TEST_SRC_DIR)/unittests-math.o $(filter-out src/main.o, $(OBJS)) $(LIBS) -o ./bin/unittests-gemma
+
+unittests: ./bin/unittests-gemma
 	./bin/unittests-gemma
 
 fast-check: all unittests
@@ -154,16 +184,18 @@ check: fast-check slow-check
 check-all: check lengthy-check
 
 clean:
+	rm $(SRC_DIR)/version.h
 	rm -vf $(SRC_DIR)/*.o
 	rm -vf $(SRC_DIR)/*~
 	rm -vf $(TEST_SRC_DIR)/*.o
 	rm -vf $(OUTPUT)
 	rm -vf ./bin/unittests-gemma
 
-DIST_COMMON = COPYING.txt README.txt Makefile
+DIST_COMMON = *.md LICENSE VERSION Makefile
 DIST_SUBDIRS = src doc example bin
 
-tar:
+tar: version all
+	@echo "Creating $(DIST_NAME)"
 	mkdir -p ./$(DIST_NAME)
 	cp $(DIST_COMMON) ./$(DIST_NAME)/
 	cp -r $(DIST_SUBDIRS) ./$(DIST_NAME)/
diff --git a/Makefile.macosx b/Makefile.macosx
index d2c1d90..fa7460e 100644
--- a/Makefile.macosx
+++ b/Makefile.macosx
@@ -30,16 +30,19 @@
 #
 #      https://github.com/genetics-statistics/GEMMA/blob/master/INSTALL.md
 
+GEMMA_VERSION = $(shell cat ./VERSION)
+
 # Set this variable to either LNX or MAC
 SYS                    = MAC # LNX|MAC (Linux is the default)
 # Leave blank after "=" to disable; put "= 1" to enable
-DIST_NAME              = gemma-0.97.2
+DIST_NAME              = gemma-$(GEMMA_VERSION)
 DEBUG                  =     # DEBUG mode, set DEBUG= for a release
 SHOW_COMPILER_WARNINGS =
 WITH_LAPACK            = 1
 WITH_OPENBLAS          =     # Defaults to LAPACK - OPENBLAS may be faster
 FORCE_STATIC           =     # Static linking of libraries
-GCC_FLAGS              = -O3 -I/usr/local/Cellar/gsl/2.4/include -I./eigen
+GCC_FLAGS              = -O3 -I/usr/local/Cellar/gsl/2.4/include -I./eigen \
+                         -I/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers
 TRAVIS_CI              =     # used by TRAVIS for testing
 EIGEN_INCLUDE_PATH=
 CXX = g++
@@ -86,7 +89,7 @@ LIBS += -framework Accelerate \
   /usr/local/Cellar/gsl/2.4/lib/libgsl.a \
   /usr/local/Cellar/gsl/2.4/lib/libgslcblas.a
 
-HDR          = $(wildcard src/*.h)
+HDR          = $(wildcard src/*.h) ./src/version.h
 SOURCES      = $(wildcard src/*.cpp)
 
 # all
@@ -94,6 +97,9 @@ OBJS = $(SOURCES:.cpp=.o)
 
 all: $(OUTPUT)
 
+./src/version.h:
+	./scripts/gen_version_info.sh > src/version.h
+
 <print-% : ; @echo $* = $($*)
 
 $(OUTPUT): $(OBJS)
@@ -129,6 +135,7 @@ check: fast-check slow-check
 check-all: check lengthy-check
 
 clean:
+	rm -vf $(SRC_DIR)/version.h
 	rm -vf $(SRC_DIR)/*.o
 	rm -vf $(SRC_DIR)/*~
 	rm -vf $(TEST_SRC_DIR)/*.o
diff --git a/NEWS.md b/NEWS.md
deleted file mode 100644
index 19d81d9..0000000
--- a/NEWS.md
+++ /dev/null
@@ -1,45 +0,0 @@
-## GEMMA 0.96.0
-
-+ First stable release.
-
-## GEMMA 0.95.2
-
-+ Resolved Issue #36.
-
-## GEMMA 0.95.1
-
-+ Created first release of GEMMA 0.95a following request in Issue #33.
-
-## GEMMA 0.94.1
-
-+ Fixed a bug (the predict option for multiple phenotype imputation
-was not recoginzed with PLINK files).
-
-## GEMMA 0.94.0
-
-+ Implemented the multivariate linear mixed model.
-
-## GEMMA 0.93
-
-+ Implemented the Bayesian sparse linear mixed model.
-
-## GEMMA 0.92
-
-+ Fixed a few typos.
-
-+ Now allows for missing values in the covariates file.
-
-+ Included REMLE estimate for lambda in the output .log file.
-
-+ Added small GWAS example dataset
-
-+ Added detailed user manual.
-
-## GEMMA 0.91
-
-+ Fixed a bug (BIMBAM annotation file not recognized).
-
-## GEMMA 0.90
-
-+ Initial pre-release.
-
diff --git a/README.md b/README.md
index 7712107..355a3a9 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,8 @@ also encourage contributions, for example, by forking the repository,
 making your changes to the code, and issuing a pull request.
 
 Currently, GEMMA is supported for 64-bit Mac OS X and Linux
-platforms. *Windows is not currently supported.* If you are interested
+platforms. *Windows is not currently supported.* though you can
+run GEMMA in a Linux VM or [container](https://docs.docker.com/docker-for-windows/). If you are interested
 in helping to make GEMMA available on Windows platforms (e.g., by
 providing installation instructions for Windows, or by contributing
 Windows binaries) please post a note in the
@@ -52,13 +53,46 @@ algorithm can be used to estimate variance components when
 individual-level data are available. For summary data, GEMMA uses the
 MQS algorithm to estimate variance components.
 
-## Quick start
+## Installation
+
+To install GEMMA you can
+
+1. Download the precompiled binaries (64-bit Linux and Mac only)
+
+2. Use existing package managers, see [INSTALL.md](INSTALL.md).
+
+3. Compile GEMMA from source, see [INSTALL.md](INSTALL.md).
+
+Compiling from source takes more work, but can potentially boost
+performance of GEMMA when using specialized C++ compilers and
+numerical libraries.
+
+### Precompiled binaries
+
+1. Fetch the [latest stable release][latest_release] and download the
+   file appropriate for your platform.
+
+2. For .tar.bz2 files unpack the tar ball
+
+        tar xvjf gemma-$version-installer.tar.bz2
+
+    run the installer
+
+        ./install.sh ~/gemma
+
+    and run gemma
+
+        ~/gemma/bin/gemma
+
+3. For .gz files run `gunzip gemma.linux.gz` or `gunzip
+gemma.linux.gz` to unpack the file.
+
 
-1. Download and install the software. See [INSTALL.md](INSTALL.md).
+## Quick start
 
-2. Work through the demo. *Give more details here.*
+1. Work through the demo. *Give more details here.*
 
-3. Read the manual and run `gemma -h`. *Give more details here.*
+2. Read the manual and run `gemma -h`. *Give more details here.*
 
 ## Citing GEMMA
 
@@ -92,7 +126,7 @@ studies.](https://doi.org/10.1101/042846) *Annals of Applied Statistics*, in pre
 
 ## License
 
-Copyright (C) 2012–2017, Xiang Zhou.
+Copyright (C) 2012–2017, Xiang Zhou and team.
 
 The *GEMMA* source code repository is free software: you can
 redistribute it under the terms of the
@@ -103,10 +137,10 @@ warranty**; without even the implied warranty of **merchantability or
 fitness for a particular purpose**. See file [LICENSE](LICENSE) for
 the full text of the license.
 
-The source code for the
-[shUnit2](https://github.com/genenetwork/shunit2) unit testing
-framework, included in this repository [here](contrib/shunit2-2.0.3), is
-distributed under the
+Both the source code for the
+[gzstream zlib wrapper](http://www.cs.unc.edu/Research/compgeom/gzstream/)
+and [shUnit2](https://github.com/genenetwork/shunit2) unit testing
+framework included in GEMMA are distributed under the
 [GNU Lesser General Public License](contrib/shunit2-2.0.3/doc/LGPL-2.1),
 either version 2.1 of the License, or (at your option) any later
 revision.
@@ -115,64 +149,59 @@ The source code for the included [Catch](http://catch-lib.net) unit
 testing framework is distributed under the
 [Boost Software Licence version 1](https://github.com/philsquared/Catch/blob/master/LICENSE.txt).
 
-## What's included
-
-This is the current structure of the GEMMA source repository:
-
-```
-├── LICENSE
-├── Makefile
-├── NEWS.md
-├── README.md
-├── bin
-├── doc
-├── example
-└── src
-```
+### Optimizing performance
 
-*Write a paragraph here briefly explaining what is in each of the
-subfolders; see Wilson et al "Good Enough Practices" paper for example
-of this.*
-
-## Setup
-
-To install GEMMA you can
-
-1. Download the precompiled binaries (64-bit Linux and Mac only), see
-   [latest stable release][latest_release].
-
-2. Use existing package managers, see [INSTALL.md](INSTALL.md).
-
-3. Compile GEMMA from source, see [INSTALL.md](INSTALL.md).
-
-Compiling from source takes more work, but can boost performance of
-GEMMA when using specialized C++ compilers and numerical libraries.
-
-Source code and [latest stable release][latest_release] are available
-from the Github repository.
-
-### Precompiled binaries
-
-1. Fetch the [latest stable release][latest_release] and download the
-file appropriate for your platform: `gemma.linux.gz` for Linux, or
-`gemma.macosx.gz` for Mac OS X.
-
-2. Run `gunzip gemma.linux.gz` or `gunzip gemma.linux.gz` to
-unpack the file.
-
-3. Downloadable binaries are linked to static versions of the GSL,
-LAPACK and BLAS libraries. There is no need to install these
-libraries.
+Precompiled binaries and libraries may not be optimal for your particular
+hardware. See [INSTALL.md](INSTALL.md) for speeding up tips.
 
 ### Building from source
 
-*Note that GEMMA currently does not work with GSL 2.x. We recommend
-linking to the latest version of GSL 1.x, which is GSL 1.16 as of this
-writing.*
-
 More information on source code, dependencies and installation can be
 found in [INSTALL.md](INSTALL.md).
 
+## Reporting a GEMMA bug or issue
+
+For bugs GEMMA has an
+[issue tracker](https://github.com/genetics-statistics/GEMMA/issues)
+on github. For general support GEMMA has a mailing list at
+[gemma-discussion](https://groups.google.com/forum/#!forum/gemma-discussion)
+
+Before posting an issue search the issue tracker and mailing list
+first. It is likely someone may have encountered something
+similiar. Also try running the latest version of GEMMA to make sure it
+has not been fixed already. Support/installation questions should be
+aimed at the mailing list. The issue tracker is for development issues
+around the software itself. When reporting an issue include the output
+of the program and the contents of the .log.txt file in the output
+directory.
+
+### Check list:
+
+1. [X] I have found and issue with GEMMA
+2. [ ] I have searched for it on the [issue tracker](https://github.com/genetics-statistics/GEMMA/issues?q=is%3Aissue) (incl. closed issues)
+3. [ ] I have searched for it on the [mailing list](https://groups.google.com/forum/#!forum/gemma-discussion)
+4. [ ] I have tried the latest [release](https://github.com/genetics-statistics/GEMMA/releases) of GEMMA
+5. [ ] I have read and agreed to below code of conduct
+6. [ ] If it is a support/install question I have posted it to the [mailing list](https://groups.google.com/forum/#!forum/gemma-discussion)
+7. [ ] If it is software development related I have posted a new issue on the [issue tracker](https://github.com/genetics-statistics/GEMMA/issues) or added to an existing one
+8. [ ] In the message I have included the output of my GEMMA run
+9. [ ] In the message I have included the relevant .log.txt file in the output directory
+10. [ ] I have made available the data to reproduce the problem (optional)
+
+To find bugs the GEMMA software developers may ask to install a
+development version of the software. They may also ask you for your
+data and will treat it confidentially.  Please always remember that
+GEMMA is written and maintained by volunteers with good
+intentions. Our time is valuable too. By helping us as much as
+possible we can provide this tool for everyone to use.
+
+## Code of conduct
+
+By using GEMMA and communicating with its communtity you implicitely
+agree to abide by the
+[code of conduct](https://software-carpentry.org/conduct/) as
+published by the Software Carpentry initiative.
+
 ## Credits
 
 The *GEMMA* software was developed by:
@@ -182,7 +211,8 @@ Dept. of Biostatistics<br>
 University of Michigan<br>
 2012-2017
 
-Peter Carbonetto, Tim Flutre, Matthew Stephens, Pjotr Prins and others
-have also contributed to the development of this software.
+Peter Carbonetto, Tim Flutre, Matthew Stephens,
+[Pjotr Prins](http://thebird.nl/) and others have also contributed to
+the development of this software.
 
 [latest_release]: https://github.com/genetics-statistics/GEMMA/releases "Most recent stable releases"
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
new file mode 100644
index 0000000..de7f3b5
--- /dev/null
+++ b/RELEASE-NOTES.md
@@ -0,0 +1,101 @@
+## ChangeLog v0.97 (2017/12/19)
+
+This is a massive bug fix release with many improvements. For contributions
+see
+[contributors](https://github.com/genetics-statistics/GEMMA/graphs/contributors)
+and
+[commits](https://github.com/genetics-statistics/GEMMA/commits/master).
+
+### Speedup of GEMMA by using optimized OpenBlas
+
+* Providing a binary release with OpenBlas optimization for Intel Haswell
+* Dropped using standar lapack and gslcblas libs
+* Fixed NaN bug with GSL2 and made recent libraries the default
+* Minimized use of Eigenlib libraries (single threaded and slow compilation)
+* -legacy switch provides v0.96 behaviour (incl. eigenlib)
+
+### Added Leave One Chromosome Out (LOCO) support for Bimbam (K and LMM)
+
+* See 449d882a3b33ef81ef4f0127c3932b01fa796dbb
+* -snps [filename] option allow selecting a subset of SNPs for analysis
+* -loco [chr] option for K and LMM computations
+* added [gemma-wrapper](https://github.com/genetics-statistics/gemma-wrapper) to make using LOCO easy
+* LOCO examples in https://github.com/genetics-statistics/GEMMA/blob/master/test/dev_test_suite.sh
+
+### Added checks for matrices
+
+* #72 and #45 implements
+  1. Fail if K has negative eigen values
+  2. Fail if K is not symmetric
+  3. Fail if K is not positive definite
+  4. Warn in eigen values are very small
+  5. Warn if K is ill conditioned
+* Check for NaN values
+
+### Added test framework and unit tests
+
+* Added integration and unit tests, as well as
+  [Travis-CI](https://travis-ci.org/genenetwork/GEMMA) support
+* Improved debug information and testing of input files
+
+### Other
+
+* #81 printing out beta and se(beta) under -lmm 2 as well as logl_H1
+* Improved README and INSTALL docs
+* Added support info and code of conduct
+* Reformatted the full source tree with 3935ba39d30666dd7d4a831155631847c77b70c4
+* Merged LMM computation for Plink and Bimbam formats
+* Fixed progressbar issues
+* #46 removed support for Oxford format
+* Got rid of all compiler warnings
+* Updated copyright banner, info and license information for included software
+* Started a [discussion list](https://groups.google.com/forum/#!forum/gemma-discussion)
+
+See also [commits](https://github.com/genetics-statistics/GEMMA/commits/master).
+
+## GEMMA 0.96.0
+
++ First stable release.
+
+## GEMMA 0.95.2
+
++ Resolved Issue #36.
+
+## GEMMA 0.95.1
+
++ Created first release of GEMMA 0.95a following request in Issue #33.
+
+## GEMMA 0.94.1
+
++ Fixed a bug (the predict option for multiple phenotype imputation
+was not recoginzed with PLINK files).
+
+## GEMMA 0.94.0
+
++ Implemented the multivariate linear mixed model.
+
+## GEMMA 0.93
+
++ Implemented the Bayesian sparse linear mixed model.
+
+## GEMMA 0.92
+
++ Fixed a few typos.
+
++ Now allows for missing values in the covariates file.
+
++ Included REMLE estimate for lambda in the output .log file.
+
++ Added small GWAS example dataset
+
++ Added detailed user manual.
+
+## GEMMA 0.91
+
++ Fixed a bug (BIMBAM annotation file not recognized).
+
+## GEMMA 0.90
+
++ Initial pre-release.
+
+See also https://github.com/genetics-statistics/GEMMA/releases
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..ea6844b
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.97
diff --git a/scripts/gen_version_info.sh b/scripts/gen_version_info.sh
new file mode 100755
index 0000000..8a9e38d
--- /dev/null
+++ b/scripts/gen_version_info.sh
@@ -0,0 +1,12 @@
+#! /bin/bash
+#
+# Script to generate the version info of GEMMA and its environment
+# in ./src/version.h
+
+DATE=$(date "+%Y/%m/%d")
+YEAR=$(date "+%Y")
+
+echo // version.h generated by GEMMA $0
+echo \#define GEMMA_VERSION \"$(cat ./VERSION)\"
+echo \#define GEMMA_DATE \"$DATE\"
+echo \#define GEMMA_YEAR \"$YEAR\"
diff --git a/src/bslmmdap.cpp b/src/bslmmdap.cpp
index 7aac1d4..e9900e3 100644
--- a/src/bslmmdap.cpp
+++ b/src/bslmmdap.cpp
@@ -116,16 +116,16 @@ void ReadFile_hyb(const string &file_hyp, vector<double> &vec_sa2,
   getline(infile, line);
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     vec_sa2.push_back(atof(ch_ptr));
 
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     vec_sb2.push_back(atof(ch_ptr));
 
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     vec_wab.push_back(atof(ch_ptr));
   }
 
@@ -152,7 +152,7 @@ void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
   vector<vector<double>> mat_bf;
   char *ch_ptr;
 
-  size_t bf_size, flag_block;
+  size_t bf_size = 0, flag_block;
 
   getline(infile, line);
 
@@ -160,11 +160,11 @@ void ReadFile_bf(const string &file_bf, vector<string> &vec_rs,
   while (!safeGetline(infile, line).eof()) {
     flag_block = 0;
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
     vec_rs.push_back(rs);
 
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     if (t == 0) {
       block = ch_ptr;
     } else {
@@ -223,7 +223,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   // Use the header to determine the number of categories.
@@ -238,7 +238,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
 
   // Read the following lines to record mapRS2cat.
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     if (header.rs_col == 0) {
       rs = chr + ":" + pos;
@@ -248,6 +248,7 @@ void ReadFile_cat(const string &file_cat, const vector<string> &vec_rs,
     catd.clear();
 
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       } else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -658,13 +659,13 @@ void single_ct_regression(const gsl_matrix_int *Xd,
     sum_pip[i] = sum[i] = 0;
   }
 
-  for (int i = 0; i < Xd->size1; i++) {
+  for (size_t i = 0; i < Xd->size1; i++) {
     int cat = gsl_matrix_int_get(Xd, i, 0);
     sum_pip[cat] += gsl_vector_get(pip_vec, i);
     sum[cat] += 1;
   }
 
-  for (int i = 0; i < Xd->size1; i++) {
+  for (size_t i = 0; i < Xd->size1; i++) {
     int cat = gsl_matrix_int_get(Xd, i, 0);
     gsl_vector_set(prior_vec, i, sum_pip[cat] / sum[cat]);
   }
@@ -683,10 +684,10 @@ void BSLMMDAP::DAP_EstimateHyper(
     const vector<double> &vec_sa2, const vector<double> &vec_sb2,
     const vector<double> &wab, const vector<vector<vector<double>>> &BF,
     gsl_matrix *Ac, gsl_matrix_int *Ad, gsl_vector_int *dlevel) {
-  clock_t time_start;
+  // clock_t time_start;
 
   // Set up BF.
-  double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save;
+  double h, rho, sigma_a2, sigma_b2, d, s, logm, logm_save = nan("");
   size_t t1, t2;
   size_t n_grid = wab.size(), ns_test = vec_rs.size();
 
diff --git a/src/debug.cpp b/src/debug.cpp
index 0d3c9cc..fd94f1e 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -1,3 +1,22 @@
+/*
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
 
 #include <cmath>
 #include <cstring>
@@ -18,28 +37,140 @@
 #include "debug.h"
 #include "mathfunc.h"
 
-// Helper function called by macro validate_K(K, check)
-void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line) {
-  if (do_check) {
+static bool debug_mode     = false;
+static bool debug_check    = true;  // check data/algorithms
+static bool debug_strict   = false; // fail on error, more rigorous checks
+static bool debug_quiet    = false;
+static uint debug_issue    = 0;     // track github issues
+static bool debug_legacy   = false; // legacy mode
+
+void debug_set_debug_mode(bool setting) { debug_mode = setting; }
+void debug_set_no_check_mode(bool setting) {debug_check = !setting; }
+void debug_set_strict_mode(bool setting) { debug_strict = setting; }
+void debug_set_quiet_mode(bool setting) { debug_quiet = setting; }
+void debug_set_issue(uint issue) { debug_issue = issue; }
+void debug_set_legacy_mode(bool setting) { debug_legacy = setting; }
+
+bool is_debug_mode() { return debug_mode; };
+bool is_no_check_mode() { return !debug_check; };
+bool is_check_mode() { return debug_check; };
+bool is_strict_mode() { return debug_strict; };
+bool is_quiet_mode() { return debug_quiet; };
+bool is_issue(uint issue) { return issue == debug_issue; };
+bool is_legacy_mode() { return debug_legacy; };
+
+
+/*
+  Helper function to make sure gsl allocations do their job because
+  gsl_matrix_alloc does not initiatize values (behaviour that changed
+  in GSL2) we introduced a 'strict mode' by initializing the buffer
+  with NaNs. This happens when NO-CHECKS is not set (default) and with
+  DEBUG (i.e. -debug option).
+*/
+gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols) {
+  gsl_matrix *m = gsl_matrix_alloc(rows,cols);
+  enforce_msg(m,"Not enough memory"); // just to be sure when there is no error handler set
+  if (is_check_mode() && is_debug_mode()) {
+    gsl_matrix_set_all(m, nan(""));
+  }
+  return m;
+}
+
+int gsl_matrix_safe_memcpy (gsl_matrix *dest, const gsl_matrix *src) {
+  enforce(dest->size1 == src->size1);
+  enforce(dest->size2 == src->size2);
+  return gsl_matrix_memcpy(dest,src);
+}
+
+void do_gsl_matrix_safe_free (gsl_matrix *m, const char *__pretty_function, const char *__file, int __line) {
+  enforce(m);
+  if (is_strict_mode() && is_check_mode() && is_debug_mode()) {
+    bool has_NaN = has_nan(m);
+    bool has_Inf = has_inf(m);
+    if (has_NaN || has_Inf) {
+      std::string msg = "Matrix (size ";
+      msg += std::to_string(m->size1);
+      msg += "x";
+      msg += std::to_string(m->size2);
+      msg += ")";
+      if (has_Inf)
+        warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains Infinite on free!").c_str());
+      if (has_NaN)
+        warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains NaN on free!").c_str());
+    }
+  }
+  return gsl_matrix_free(m);
+}
+
+int gsl_vector_safe_memcpy (gsl_vector *dest, const gsl_vector *src) {
+  enforce(dest->size == src->size);
+  return gsl_vector_memcpy(dest,src);
+}
+
+void do_gsl_vector_safe_free (gsl_vector *v, const char *__pretty_function, const char *__file, int __line) {
+  enforce(v);
+  if (is_strict_mode() && is_check_mode() && is_debug_mode()) {
+    bool has_NaN = has_nan(v);
+    bool has_Inf = has_inf(v);
+    if (has_NaN || has_Inf) {
+      std::string msg = "Vector (size ";
+      msg += std::to_string(v->size);
+      msg += ")";
+      if (has_Inf)
+        warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains Infinite on free!").c_str());
+      if (has_NaN)
+        warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,(msg+" contains NaN on free!").c_str());
+    }
+  }
+  return gsl_vector_free(v);
+}
+
+/*
+  Helper function to make sure gsl allocations do their job because
+  gsl_vector_alloc does not initiatize values (behaviour that changed
+  in GSL2) we introduced a 'strict mode' by initializing the buffer
+  with NaNs. This happens when NO-CHECKS is not set and with DEBUG
+  (i.e. -debug option).
+*/
+gsl_vector *gsl_vector_safe_alloc(size_t n) {
+  gsl_vector *v = gsl_vector_alloc(n);
+  enforce_msg(v,"Not enough memory"); // just to be sure when there is no error handler set
+  if (is_check_mode() && is_debug_mode()) {
+    gsl_vector_set_all(v, nan(""));
+  }
+  return v;
+}
+
+char *do_strtok_safe(char *tokenize, const char *delimiters, const char *__pretty_function, const char *__file, int __line) {
+  auto token = strtok(tokenize,delimiters);
+  if (token == NULL && (is_debug_mode() || is_strict_mode()))
+    fail_at_msg(__file,__line,string("strtok failed in ") + __pretty_function);
+  return token;
+}
+
+// Helper function called by macro validate_K(K, check). K is validated
+// unless -no-check option is used.
+void do_validate_K(const gsl_matrix *K, const char *__pretty_function, const char *__file, int __line) {
+  if (is_check_mode()) {
     // debug_msg("Validating K");
     auto eigenvalues = getEigenValues(K);
-    const uint count_small = count_small_values(eigenvalues,EIGEN_MINVALUE);
+    const uint count_small = count_abs_small_values(eigenvalues,EIGEN_MINVALUE);
     if (count_small>1) {
       std::string msg = "K has ";
       msg += std::to_string(count_small);
       msg += " eigenvalues close to zero";
       warning_at_msg(__file,__line,msg);
     }
-    if (!isMatrixIllConditioned(eigenvalues))
+    if (isMatrixIllConditioned(eigenvalues))
       warning_at_msg(__file,__line,"K is ill conditioned!");
     if (!isMatrixSymmetric(K))
-      fail_at_msg(strict,__file,__line,"K is not symmetric!" );
+      warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,"K is not symmetric!" );
     const bool negative_values = has_negative_values_but_one(eigenvalues);
     if (negative_values) {
       warning_at_msg(__file,__line,"K has more than one negative eigenvalues!");
     }
     if (count_small>1 && negative_values && !isMatrixPositiveDefinite(K))
-      fail_at_msg(strict,__file,__line,"K is not positive definite!");
+      warnfail_at_msg(is_strict_mode(),__pretty_function,__file,__line,"K is not positive definite!");
     gsl_vector_free(eigenvalues);
   }
 }
diff --git a/src/debug.h b/src/debug.h
index 06ca5cb..208868e 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -1,3 +1,23 @@
+/*
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
 #ifndef __DEBUG_H__
 #define __DEBUG_H__
 
@@ -10,43 +30,92 @@ void gemma_gsl_error_handler (const char * reason,
                               const char * file,
                               int line, int gsl_errno);
 
+void debug_set_debug_mode(bool setting);
+void debug_set_no_check_mode(bool setting);
+void debug_set_strict_mode(bool setting);
+void debug_set_quiet_mode(bool setting);
+void debug_set_issue(uint issue);
+void debug_set_legacy_mode(bool setting);
+
+bool is_debug_mode();
+bool is_no_check_mode();
+bool is_check_mode();
+bool is_strict_mode();
+bool is_quiet_mode();
+bool is_issue(uint issue);
+bool is_legacy_mode();
+
+#define check_int_mult_overflow(m,n) \
+  { auto x = m * n;                                      \
+    enforce_msg(x / m == n, "multiply integer overflow"); }
+
+gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols);
+int gsl_matrix_safe_memcpy (gsl_matrix *dest, const gsl_matrix *src);
+void gsl_matrix_safe_free (gsl_matrix *v);
+void do_gsl_matrix_safe_free (gsl_matrix *m, const char *__pretty_function, const char *__file, int __line);
+
+gsl_vector *gsl_vector_safe_alloc(size_t n);
+int gsl_vector_safe_memcpy (gsl_vector *dest, const gsl_vector *src);
+void gsl_vector_safe_free (gsl_vector *v);
+void do_gsl_vector_safe_free (gsl_vector *v, const char *__pretty_function, const char *__file, int __line);
+
+char *do_strtok_safe(char *tokenize, const char *delimiters, const char *__pretty_function, const char *__file, int __line);
+#define strtok_safe(string,delimiters) do_strtok_safe(string,delimiters,__SHOW_FUNC,__FILE__,__LINE__)
 
 // Validation routines
-void do_validate_K(const gsl_matrix *K, bool do_check, bool strict, const char *__file, int __line);
+void do_validate_K(const gsl_matrix *K, const char*__pretty_func, const char *__file, int __line);
 
 #define ROUND(f) round(f * 10000.)/10000
-#define validate_K(K,check,strict) do_validate_K(K,check,strict,__FILE__,__LINE__)
+#define validate_K(K) do_validate_K(K,__SHOW_FUNC,__FILE__,__LINE__)
 
 #define warning_at_msg(__file,__line,msg) cerr << "**** WARNING: " << msg << " in " << __file << " at line " << __line << endl;
 
-inline void fail_at_msg(bool strict, const char *__file, int __line, const char *msg) {
+inline void warnfail_at_msg(bool strict, const char *__function, const char *__file, int __line, const char *msg) {
   if (strict)
     std::cerr << "**** STRICT FAIL: ";
   else
     std::cerr << "**** WARNING: ";
-  std::cerr << msg << " in " << __file << " at line " << __line << std::endl;
+  std::cerr << msg << " in " << __file << " at line " << __line << " in " << __function << std::endl;
   if (strict)
     exit(1);
 }
 
+inline void fail_at_msg(const char *__file, int __line, std::string msg) {
+  std::cerr << msg << " in " << __file << " at line " << __line << std::endl;
+  exit(1);
+}
+
 # ifndef __ASSERT_VOID_CAST
 # define __ASSERT_VOID_CAST (void)
 # endif
 
+inline void fail_msg(const char *msg) {
+  std::cerr << "**** FAILED: " << msg << std::endl;
+  exit(5);
+}
+
+inline void fail_msg(std::string msg) {
+  std::cerr << "**** FAILED: " << msg << std::endl;
+  exit(5);
+}
+
 #if defined NDEBUG
+  #define __SHOW_FUNC __func__
 
-#define warning_msg(msg) cerr << "**** WARNING: " << msg << endl;
-#define debug_msg(msg)
-#define assert_issue(is_issue, expr)
+  #define warning_msg(msg) cerr << "**** WARNING: " << msg << endl;
+  #define debug_msg(msg)
+  #define assert_issue(is_issue, expr)
 
 #else // DEBUG
 
-#define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl;
-#define debug_msg(msg) cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __FUNCTION__ << endl;
-#define assert_issue(is_issue, expr) \
-  ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0))
+  #define __SHOW_FUNC __func__
 
-#endif
+  #define warning_msg(msg) cerr << "**** WARNING: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __func__ << endl;
+  #define debug_msg(msg) (is_debug_mode() && cerr << "**** DEBUG: " << msg << " in " << __FILE__ << " at line " << __LINE__ << " in " << __func__ << endl);
+  #define assert_issue(is_issue, expr) \
+    ((is_issue) ? enforce_msg(expr,"FAIL: ISSUE assert") : __ASSERT_VOID_CAST(0))
+
+#endif // NDEBUG
 
 // enforce works like assert but also when NDEBUG is set (i.e., it
 // always works). enforce_msg prints message instead of expr
@@ -56,25 +125,23 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
                     unsigned int __line,
                     const char *__function)
 {
-  std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __PRETTY_FUNCTION__ << std::endl;
+  std::cout << "ERROR: Enforce failed for " << __assertion << " in " << __file << " at line " << __line << " in " << __function << std::endl;
   exit(1);
 }
 
-#define __ASSERT_FUNCTION __PRETTY_FUNCTION__
-
 #define enforce(expr)                                                          \
   ((expr)                                                                      \
        ? __ASSERT_VOID_CAST(0)                                                 \
-       : __enforce_fail(__STRING(expr), __FILE__, __LINE__, __ASSERT_FUNCTION))
+       : __enforce_fail(__STRING(expr), __FILE__, __LINE__, __SHOW_FUNC))
 
 #define enforce_msg(expr, msg)                                                 \
   ((expr) ? __ASSERT_VOID_CAST(0)                                              \
-          : __enforce_fail(msg, __FILE__, __LINE__, __ASSERT_FUNCTION))
+          : __enforce_fail(msg, __FILE__, __LINE__, __SHOW_FUNC))
 
 #define enforce_str(expr, msg)                                                 \
   ((expr)                                                                      \
        ? __ASSERT_VOID_CAST(0)                                                 \
-       : __enforce_fail((msg).c_str(), __FILE__, __LINE__, __ASSERT_FUNCTION))
+       : __enforce_fail((msg).c_str(), __FILE__, __LINE__, __SHOW_FUNC))
 
 // Helpers to create a unique varname per MACRO
 #define COMBINE1(X, Y) X##Y
@@ -85,6 +152,16 @@ inline void __enforce_fail(const char *__assertion, const char *__file,
   (COMBINE(res, __LINE__) == 0                                                 \
        ? __ASSERT_VOID_CAST(0)                                                 \
        : __enforce_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__,         \
-                        __LINE__, __ASSERT_FUNCTION))
+                        __LINE__, __SHOW_FUNC))
+
+#define enforce_fexists(fn, msg)                                               \
+  if (!fn.empty())                                                             \
+    enforce_msg(stat(fn.c_str(), &fileInfo) == 0,                              \
+                ((std::string(__STRING(fn)) + " " + fn + ": " + msg).c_str()));
+
+#define gsl_matrix_safe_free(m) \
+  do_gsl_matrix_safe_free(m,__SHOW_FUNC,__FILE__,__LINE__);
+#define gsl_vector_safe_free(v) \
+  do_gsl_vector_safe_free(v,__SHOW_FUNC,__FILE__,__LINE__);
 
 #endif
diff --git a/src/eigenlib.cpp b/src/eigenlib.cpp
index a8c545c..4d6aacc 100644
--- a/src/eigenlib.cpp
+++ b/src/eigenlib.cpp
@@ -17,16 +17,18 @@
 */
 
 #include "Eigen/Dense"
-#include "gsl/gsl_linalg.h"
+// #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
-#include "gsl/gsl_vector.h"
+// #include "gsl/gsl_vector.h"
 #include <cmath>
 #include <iostream>
 #include <vector>
+#include <cblas.h>
 
 using namespace std;
 using namespace Eigen;
 
+
 // On two different clusters, compare eigen vs lapack/gsl:
 //
 // dgemm, 5x or 0.5x faster or slower than lapack, 5x or 4x faster than gsl
@@ -57,8 +59,6 @@ void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
       C_mat = alpha * A_mat.transpose() * B_mat.transpose() + beta * C_mat;
     }
   }
-
-  return;
 }
 
 void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
@@ -75,15 +75,12 @@ void eigenlib_dgemv(const char *TransA, const double alpha, const gsl_matrix *A,
   } else {
     y_vec = alpha * A_mat.transpose() * x_vec + beta * y_vec;
   }
-
-  return;
 }
 
 void eigenlib_invert(gsl_matrix *A) {
   Map<Matrix<double, Dynamic, Dynamic, RowMajor>> A_mat(A->data, A->size1,
                                                         A->size2);
   A_mat = A_mat.inverse();
-  return;
 }
 
 void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
@@ -92,7 +89,6 @@ void eigenlib_dsyr(const double alpha, const gsl_vector *b, gsl_matrix *A) {
   Map<Matrix<double, Dynamic, 1>, 0, OuterStride<Dynamic>> b_vec(
       b->data, b->size, OuterStride<Dynamic>(b->stride));
   A_mat = alpha * b_vec * b_vec.transpose() + A_mat;
-  return;
 }
 
 void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
@@ -108,5 +104,4 @@ void eigenlib_eigensymm(const gsl_matrix *G, gsl_matrix *U, gsl_vector *eval) {
     abort();
   eval_vec = es.eigenvalues();
   U_mat = es.eigenvectors();
-  return;
 }
diff --git a/src/eigenlib.h b/src/eigenlib.h
index b29fa63..7fb69ad 100644
--- a/src/eigenlib.h
+++ b/src/eigenlib.h
@@ -19,9 +19,9 @@
 #ifndef __EIGENLIB_H__
 #define __EIGENLIB_H__
 
-#include <vector>
+// #include <vector>
 
-using namespace std;
+// using namespace std;
 
 void eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
                     const gsl_matrix *A, const gsl_matrix *B, const double beta,
diff --git a/src/fastblas.cpp b/src/fastblas.cpp
new file mode 100644
index 0000000..362027c
--- /dev/null
+++ b/src/fastblas.cpp
@@ -0,0 +1,239 @@
+/*
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "gsl/gsl_matrix.h"
+#include <algorithm>    // std::min
+#include <cmath>
+#include <iomanip>
+#include <vector>
+#include <cblas.h>
+#include "debug.h"
+#include "fastblas.h"
+#include "mathfunc.h"
+#include <string.h>
+#include "eigenlib.h"
+
+using namespace std;
+
+/*
+   Reasonably fast function to copy data from standard C array into
+   gsl_matrix. Avoid it for performance critical sections.
+*/
+gsl_matrix *fast_copy(gsl_matrix *m, const double *mem) {
+  auto rows = m->size1;
+  auto cols = m->size2;
+  if (is_strict_mode()) { // slower correct version
+    for (size_t r=0; r<rows; r++) {
+      for (size_t c=0; c<cols; c++) {
+        gsl_matrix_set(m,r,c,mem[r*cols+c]);
+      }
+    }
+  } else { // faster goes by row
+    auto v = gsl_vector_calloc(cols);
+    enforce(v); // just to be sure
+    for (size_t r=0; r<rows; r++) {
+      assert(v->size == cols);
+      assert(v->block->size == cols);
+      assert(v->stride == 1);
+      memcpy(v->block->data,&mem[r*cols],cols*sizeof(double));
+      gsl_matrix_set_row(m,r,v);
+    }
+    gsl_vector_free(v);
+  }
+  return m;
+}
+
+/*
+    Helper function fast_cblas_dgemm runs the local dgemm
+*/
+void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
+                      const enum CBLAS_TRANSPOSE TransA,
+                      const enum CBLAS_TRANSPOSE TransB,
+                      const size_t M,
+                      const size_t N,
+                      const size_t K,
+                      const double alpha,
+                      const double *A,
+                      const size_t lda,
+                      const double *B,
+                      const size_t ldb,
+                      const double beta,
+                      double *C,
+                      const size_t ldc) {
+#ifndef NDEBUG
+  if (is_debug_mode()) {
+    #ifdef DISABLED
+    size_t i,j;
+    printf (" Top left corner of matrix A: \n");
+    for (i=0; i<min(M,6); i++) {
+      for (j=0; j<min(K,6); j++) {
+        printf ("%12.0f", A[j+i*K]);
+      }
+      printf ("\n");
+    }
+
+    printf ("\n Top left corner of matrix B: \n");
+    for (i=0; i<min(K,6); i++) {
+      for (j=0; j<min(N,6); j++) {
+        printf ("%12.0f", B[j+i*N]);
+      }
+      printf ("\n");
+    }
+
+    printf ("\n Top left corner of matrix C: \n");
+    for (i=0; i<min(M,6); i++) {
+      for (j=0; j<min(N,6); j++) {
+        printf ("%12.5G", C[j+i*N]);
+      }
+      printf ("\n");
+    }
+    #endif
+
+    cout << scientific << setprecision(3) << "* RowMajor " << Order << "\t" ;
+    cout << "transA " << TransA << "\t" ;
+    cout << "transB " << TransB << "\t" ;
+    cout << "m " << M << "\t" ;
+    cout << "n " << N << "\t" ;
+    cout << "k " << K << "\n" ;
+    cout << "* lda " << lda << "\t" ;
+    cout << "ldb " << ldb << "\t" ;
+    cout << "ldc " << ldc << "\t" ;
+    cout << "alpha " << alpha << "\t" ;
+    cout << "beta " << beta << "\n" ;
+    cout << "* A03 " << A[3] << "\t" ;
+    cout << "B03 " << B[3] << "\t" ;
+    cout << "C03 " << C[3] << "\t" ;
+    cout << "Asum " << sum(A,M,K) << "\t" ;
+    cout << "Bsum " << sum(B,K,N) << "\n" ;
+    cout << "Csum " << sum(C,M,N) << "\n" ;
+  }
+#endif // NDEBUG
+
+  // Check for (integer) overflows
+  enforce(M>0);
+  enforce(N>0);
+  enforce(K>0);
+
+  // check_int_mult_overflow(560000,8000); // fails on default int (32-bits)
+  check_int_mult_overflow(M,K);
+  check_int_mult_overflow(N,K);
+  check_int_mult_overflow(M,N);
+
+  cblas_dgemm(Order,TransA,TransB,M,N,K,alpha,A,lda,B,ldb,beta,C,ldc);
+
+#ifndef NDEBUG
+  #ifdef DISABLED
+  if (is_debug_mode()) {
+    printf (" Top left corner of matrix A (cols=k %i, rows=m %i): \n",K,M);
+    for (i=0; i<min(M,6); i++) {
+      for (j=0; j<min(K,6); j++) {
+        printf ("%12.0f", A[j+i*K]);
+      }
+      printf ("\n");
+    }
+
+    printf ("\n Top left corner of matrix B: \n");
+    for (i=0; i<min(K,6); i++) {
+      for (j=0; j<min(N,6); j++) {
+        printf ("%12.0f", B[j+i*N]);
+      }
+      printf ("\n");
+    }
+
+    printf ("\n Top left corner of matrix C: \n");
+    for (i=0; i<min(M,6); i++) {
+      for (j=0; j<min(N,6); j++) {
+      printf ("%12.5G", C[j+i*N]);
+      }
+      printf ("\n");
+    }
+  }
+  #endif
+#endif // NDEBUG
+}
+
+/*
+    Helper function fast_cblas_dgemm converts a GEMMA layout to cblas_dgemm.
+*/
+static void fast_cblas_dgemm(const char *TransA, const char *TransB, const double alpha,
+                             const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                             gsl_matrix *C) {
+  // C++ is row-major
+  auto transA = (*TransA == 'N' || *TransA == 'n' ? CblasNoTrans : CblasTrans);
+  auto transB = (*TransB == 'N' || *TransB == 'n' ? CblasNoTrans : CblasTrans);
+  const size_t M   = C->size1;
+  const size_t N   = C->size2;
+  const size_t MA  = (transA == CblasNoTrans) ? A->size1 : A->size2;
+  const size_t NA  = (transA == CblasNoTrans) ? A->size2 : A->size1;
+  const size_t MBx = (transB == CblasNoTrans) ? B->size1 : B->size2;
+  const size_t NB  = (transB == CblasNoTrans) ? B->size2 : B->size1;
+
+  if (M == MA && N == NB && NA == MBx) {  /* [MxN] = [MAxNA][MBxNB] */
+
+    auto K = NA;
+
+    // Check for (integer) overflows
+    enforce(M>0);
+    enforce(N>0);
+    enforce(K>0);
+
+    // check_int_mult_overflow(560000,8000);
+    check_int_mult_overflow(M,K);
+    check_int_mult_overflow(N,K);
+    check_int_mult_overflow(M,N);
+
+    cblas_dgemm (CblasRowMajor, transA, transB, M, N, NA,
+                 alpha, A->data, A->tda, B->data, B->tda, beta,
+                 C->data, C->tda);
+
+  } else {
+    fail_msg("Range error in dgemm");
+  }
+}
+
+
+/*
+   Use the fast/supported way to call BLAS dgemm
+*/
+
+void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
+                const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                gsl_matrix *C) {
+  fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
+
+#ifdef DISABLE
+  if (is_check_mode()) {
+    // ---- validate with original implementation
+    gsl_matrix *C1 = gsl_matrix_alloc(C->size1,C->size2);
+    eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C1);
+    enforce_msg(gsl_matrix_equal(C,C1),"dgemm outcomes are not equal for fast & eigenlib");
+    gsl_matrix_free(C1);
+  }
+#endif
+}
+
+void fast_eigen_dgemm(const char *TransA, const char *TransB, const double alpha,
+                      const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                      gsl_matrix *C) {
+  if (is_legacy_mode())
+    eigenlib_dgemm(TransA,TransB,alpha,A,B,beta,C);
+  else
+    fast_cblas_dgemm(TransA,TransB,alpha,A,B,beta,C);
+}
diff --git a/src/fastblas.h b/src/fastblas.h
new file mode 100644
index 0000000..6000983
--- /dev/null
+++ b/src/fastblas.h
@@ -0,0 +1,37 @@
+/*
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __FASTBLAS_H__
+#define __FASTBLAS_H__
+
+#include <assert.h>
+#include <iostream>
+#include "gsl/gsl_matrix.h"
+
+gsl_matrix *fast_copy(gsl_matrix *m, const double *mem);
+
+void fast_dgemm(const char *TransA, const char *TransB, const double alpha,
+                const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                gsl_matrix *C);
+void fast_eigen_dgemm(const char *TransA, const char *TransB, const double alpha,
+                      const gsl_matrix *A, const gsl_matrix *B, const double beta,
+                      gsl_matrix *C);
+
+#endif
diff --git a/src/fastopenblas.h b/src/fastopenblas.h
new file mode 100644
index 0000000..3dd8ef7
--- /dev/null
+++ b/src/fastopenblas.h
@@ -0,0 +1,44 @@
+/*
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef __FASTOPENBLAS_H__
+#define __FASTOPENBLAS_H__
+
+#include <assert.h>
+#include <iostream>
+#include <cblas.h>   // For OpenBlas
+#include "gsl/gsl_matrix.h"
+
+void fast_cblas_dgemm(const enum CBLAS_ORDER Order,
+                      const enum CBLAS_TRANSPOSE TransA,
+                      const enum CBLAS_TRANSPOSE TransB,
+                      const size_t M,
+                      const size_t N,
+                      const size_t K,
+                      const double alpha,
+                      const double *A,
+                      const size_t lda,
+                      const double *B,
+                      const size_t ldb,
+                      const double beta,
+                      double *C,
+                      const size_t ldc);
+
+#endif // __FASTOPENBLAS_H_
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 24173c3..edd79d7 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -23,6 +25,17 @@
 #include <iostream>
 #include <string>
 #include <sys/stat.h>
+#ifdef OPENBLAS
+#pragma message "Compiling with OPENBLAS"
+extern "C" {
+  // these functions are defined in cblas.h - but if we include that we
+  // conflicts with other BLAS includes
+  int openblas_get_num_threads(void);
+  int openblas_get_parallel(void);
+  char* openblas_get_config(void);
+  char* openblas_get_corename(void);
+}
+#endif
 
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
@@ -46,10 +59,11 @@
 #include "varcov.h"
 #include "vc.h"
 #include "debug.h"
+#include "version.h"
 
 using namespace std;
 
-GEMMA::GEMMA(void) : version("0.97.3"), date("10/10/2017"), year("2017") {}
+GEMMA::GEMMA(void) : version(GEMMA_VERSION), date(GEMMA_DATE), year(GEMMA_YEAR) {}
 
 void gemma_gsl_error_handler (const char * reason,
                               const char * file,
@@ -59,20 +73,14 @@ void gemma_gsl_error_handler (const char * reason,
   exit(22);
 }
 
+#if defined(OPENBLAS) && !defined(OPENBLAS_LEGACY)
+#include <openblas_config.h>
+#endif
+
 void GEMMA::PrintHeader(void) {
-  cout << endl;
-  cout << "*********************************************************" << endl;
-  cout << "  Genome-wide Efficient Mixed Model Association (GEMMA)  " << endl;
-  cout << "  Version " << version << ", " << date
-       << "                              " << endl;
-  cout << "  Visit http://www.xzlab.org/software.html For Updates   " << endl;
-  cout << "  (C) " << year << " Xiang Zhou                                   "
-       << endl;
-  cout << "  GNU General Public License                             " << endl;
-  cout << "  For Help, Type ./gemma -h                              " << endl;
-  cout << "*********************************************************" << endl;
-  cout << endl;
 
+  cout <<
+    "GEMMA " << version << " (" << date << ") by Xiang Zhou and team (C) 2012-" << year << endl;
   return;
 }
 
@@ -141,22 +149,20 @@ void GEMMA::PrintLicense(void) {
 }
 
 void GEMMA::PrintHelp(size_t option) {
+
   if (option == 0) {
     cout << endl;
-    cout << " GEMMA version " << version << ", released on " << date << endl;
-    cout << " implemented by Xiang Zhou" << endl;
-    cout << endl;
-    cout << " type ./gemma -h [num] for detailed helps" << endl;
+    cout << " type ./gemma -h [num] for detailed help" << endl;
     cout << " options: " << endl;
-    cout << " 1: quick guide" << endl;
-    cout << " 2: file I/O related" << endl;
-    cout << " 3: SNP QC" << endl;
-    cout << " 4: calculate relatedness matrix" << endl;
-    cout << " 5: perform eigen decomposition" << endl;
-    cout << " 6: perform variance component estimation" << endl;
-    cout << " 7: fit a linear model" << endl;
-    cout << " 8: fit a linear mixed model" << endl;
-    cout << " 9: fit a multivariate linear mixed model" << endl;
+    cout << "  1: quick guide" << endl;
+    cout << "  2: file I/O related" << endl;
+    cout << "  3: SNP QC" << endl;
+    cout << "  4: calculate relatedness matrix" << endl;
+    cout << "  5: perform eigen decomposition" << endl;
+    cout << "  6: perform variance component estimation" << endl;
+    cout << "  7: fit a linear model" << endl;
+    cout << "  8: fit a linear mixed model" << endl;
+    cout << "  9: fit a multivariate linear mixed model" << endl;
     cout << " 10: fit a Bayesian sparse linear mixed model" << endl;
     cout << " 11: obtain predicted values" << endl;
     cout << " 12: calculate snp variance covariance" << endl;
@@ -310,11 +316,6 @@ void GEMMA::PrintHelp(size_t option) {
     cout << "                  rs#2, base_position, chr_number" << endl;
     cout << "                  ..." << endl;
 
-    // WJA added.
-    cout << " -oxford    [prefix]       "
-         << " specify input Oxford genotype bgen file prefix." << endl;
-    cout << "          requires: *.bgen, *.sample files" << endl;
-
     cout << " -gxe      [filename]     "
          << " specify input file that contains a column of environmental "
             "factor for g by e tests"
@@ -429,8 +430,8 @@ void GEMMA::PrintHelp(size_t option) {
             "default 1)"
          << endl;
     cout << " -pace     [num]          "
-         << " specify terminal display update pace (default 100000 SNPs or "
-            "100000 iterations)."
+         << " specify terminal display update pace (default 1,000 SNPs or "
+            "1,000 iterations)."
          << endl;
     cout << " -outdir   [path]         "
          << " specify output directory path (default \"./output/\")" << endl;
@@ -542,9 +543,13 @@ void GEMMA::PrintHelp(size_t option) {
     cout << " -lmax     [num]          "
          << " specify maximum value for lambda (default 1e+5)" << endl;
     cout
-        << " -region   [num]          "
+        << " -region    [num]          "
         << " specify the number of regions used to evaluate lambda (default 10)"
         << endl;
+    cout << " -loco     [chr]          "
+         << " leave one chromosome out (LOCO) by name (requires -a annotation "
+            "file)"
+         << endl;
     cout << endl;
   }
 
@@ -715,9 +720,14 @@ void GEMMA::PrintHelp(size_t option) {
     cout << " -debug                   debug output" << endl;
     cout << " -nind       [num]        read up to num individuals" << endl;
     cout << " -issue      [num]        enable tests relevant to issue tracker" << endl;
+    cout << " -legacy                  run gemma in legacy mode" << endl;
     cout << endl;
   }
 
+  cout << "The GEMMA software is distributed under the GNU General Public v3" << endl;
+  cout << "   -license    show license information" << endl;
+  cout <<
+    "   see also http://www.xzlab.org/software.html, https://github.com/genetics-statistics" << endl;
   return;
 }
 
@@ -759,8 +769,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.clear();
       str.assign(argv[i]);
       cPar.file_mbfile = str;
-    } else if (strcmp(argv[i], "-silence") == 0) {
-      cPar.mode_silence = true;
+    } else if (strcmp(argv[i], "-silence") == 0 || strcmp(argv[i], "--quiet") == 0) {
+      debug_set_quiet_mode(true);
     } else if (strcmp(argv[i], "-g") == 0) {
       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
         continue;
@@ -793,18 +803,6 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.clear();
       str.assign(argv[i]);
       cPar.file_anno = str;
-    }
-
-    // WJA added.
-    else if (strcmp(argv[i], "-oxford") == 0 ||
-             strcmp(argv[i], "--oxford") == 0 || strcmp(argv[i], "-x") == 0) {
-      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
-        continue;
-      }
-      ++i;
-      str.clear();
-      str.assign(argv[i]);
-      cPar.file_oxford = str;
     } else if (strcmp(argv[i], "-gxe") == 0) {
       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
         continue;
@@ -1373,8 +1371,9 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       ++i;
       str.clear();
       str.assign(argv[i]);
-      cPar.issue = atoi(str.c_str()); // for testing purposes
-      enforce(cPar.issue > 0);
+      auto issue = atoi(str.c_str()); // for testing purposes
+      enforce(issue > 0);
+      debug_set_issue(issue);
     } else if (strcmp(argv[i], "-emp") == 0) {
       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
         continue;
@@ -1594,11 +1593,17 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.assign(argv[i]);
       cPar.window_ns = atoi(str.c_str());
     } else if (strcmp(argv[i], "-debug") == 0) {
-      cPar.mode_debug = true;
+      // cPar.mode_debug = true;
+      debug_set_debug_mode(true);
     } else if (strcmp(argv[i], "-no-check") == 0) {
-      cPar.mode_check = false;
+      // cPar.mode_check = false;
+      debug_set_no_check_mode(true);
     } else if (strcmp(argv[i], "-strict") == 0) {
-      cPar.mode_strict = true;
+      // cPar.mode_strict = true;
+      debug_set_strict_mode(true);
+    } else if (strcmp(argv[i], "-legacy") == 0) {
+      debug_set_legacy_mode(true);
+      warning_msg("you are running in legacy mode - support may drop in future versions of gemma");
     } else {
       cout << "error! unrecognized option: " << argv[i] << endl;
       cPar.error = true;
@@ -1635,7 +1640,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
   if (cPar.a_mode == 41 || cPar.a_mode == 42) {
     gsl_vector *y_prdt;
 
-    y_prdt = gsl_vector_alloc(cPar.ni_total - cPar.ni_test);
+    y_prdt = gsl_vector_safe_alloc(cPar.ni_total - cPar.ni_test);
 
     // set to zero
     gsl_vector_set_zero(y_prdt);
@@ -1647,8 +1652,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
     if (!cPar.file_kin.empty() && !cPar.file_ebv.empty()) {
       cout << "Adding Breeding Values ... " << endl;
 
-      gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
-      gsl_vector *u_hat = gsl_vector_alloc(cPar.ni_test);
+      gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
+      gsl_vector *u_hat = gsl_vector_safe_alloc(cPar.ni_test);
 
       // read kinship matrix and set u_hat
       vector<int> indicator_all;
@@ -1671,8 +1676,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
       // read u
       cPRDT.AddBV(G, u_hat, y_prdt);
 
-      gsl_matrix_free(G);
-      gsl_vector_free(u_hat);
+      gsl_matrix_safe_free(G);
+      gsl_vector_safe_free(u_hat);
     }
 
     // add beta
@@ -1699,32 +1704,32 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
     cPRDT.WriteFiles(y_prdt);
 
-    gsl_vector_free(y_prdt);
+    gsl_vector_safe_free(y_prdt);
   }
 
   // Prediction with kinship matrix only; for one or more phenotypes
   if (cPar.a_mode == 43) {
     // first, use individuals with full phenotypes to obtain estimates of Vg and
     // Ve
-    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
-    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
-    gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
-    gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
-    gsl_matrix *UtW = gsl_matrix_alloc(Y->size1, W->size2);
-    gsl_matrix *UtY = gsl_matrix_alloc(Y->size1, Y->size2);
-    gsl_vector *eval = gsl_vector_alloc(Y->size1);
+    gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+    gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+    gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+    gsl_matrix *UtW = gsl_matrix_safe_alloc(Y->size1, W->size2);
+    gsl_matrix *UtY = gsl_matrix_safe_alloc(Y->size1, Y->size2);
+    gsl_vector *eval = gsl_vector_safe_alloc(Y->size1);
 
-    gsl_matrix *Y_full = gsl_matrix_alloc(cPar.ni_cvt, cPar.n_ph);
-    gsl_matrix *W_full = gsl_matrix_alloc(Y_full->size1, cPar.n_cvt);
+    gsl_matrix *Y_full = gsl_matrix_safe_alloc(cPar.ni_cvt, cPar.n_ph);
+    gsl_matrix *W_full = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_cvt);
 
     // set covariates matrix W and phenotype matrix Y
     // an intercept should be included in W,
     cPar.CopyCvtPhen(W, Y, 0);
     cPar.CopyCvtPhen(W_full, Y_full, 1);
 
-    gsl_matrix *Y_hat = gsl_matrix_alloc(Y_full->size1, cPar.n_ph);
-    gsl_matrix *G_full = gsl_matrix_alloc(Y_full->size1, Y_full->size1);
-    gsl_matrix *H_full = gsl_matrix_alloc(Y_full->size1 * Y_hat->size2,
+    gsl_matrix *Y_hat = gsl_matrix_safe_alloc(Y_full->size1, cPar.n_ph);
+    gsl_matrix *G_full = gsl_matrix_safe_alloc(Y_full->size1, Y_full->size1);
+    gsl_matrix *H_full = gsl_matrix_safe_alloc(Y_full->size1 * Y_hat->size2,
                                           Y_full->size1 * Y_hat->size2);
 
     // read relatedness matrix G, and matrix G_full
@@ -1745,7 +1750,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     // center matrix G
     CenterMatrix(G);
     CenterMatrix(G_full);
-    validate_K(G,cPar.mode_check,cPar.mode_strict);
+    validate_K(G);
 
     // eigen-decomposition and calculate trace_G
     cout << "Start Eigen-Decomposition..." << endl;
@@ -1760,8 +1765,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
     // calculate variance component and beta estimates
     // and then obtain predicted values
     if (cPar.n_ph == 1) {
-      gsl_vector *beta = gsl_vector_alloc(W->size2);
-      gsl_vector *se_beta = gsl_vector_alloc(W->size2);
+      gsl_vector *beta = gsl_vector_safe_alloc(W->size2);
+      gsl_vector *se_beta = gsl_vector_safe_alloc(W->size2);
 
       double lambda, logl, vg, ve;
       gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
@@ -1788,13 +1793,13 @@ void GEMMA::BatchRun(PARAM &cPar) {
       gsl_matrix_add(H_full, G_full);
 
       // free matrices
-      gsl_vector_free(beta);
-      gsl_vector_free(se_beta);
+      gsl_vector_safe_free(beta);
+      gsl_vector_safe_free(se_beta);
     } else {
-      gsl_matrix *Vg = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
-      gsl_matrix *Ve = gsl_matrix_alloc(cPar.n_ph, cPar.n_ph);
-      gsl_matrix *B = gsl_matrix_alloc(cPar.n_ph, W->size2);
-      gsl_matrix *se_B = gsl_matrix_alloc(cPar.n_ph, W->size2);
+      gsl_matrix *Vg = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
+      gsl_matrix *Ve = gsl_matrix_safe_alloc(cPar.n_ph, cPar.n_ph);
+      gsl_matrix *B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
+      gsl_matrix *se_B = gsl_matrix_safe_alloc(cPar.n_ph, W->size2);
 
       // obtain estimates
       CalcMvLmmVgVeBeta(eval, UtW, UtY, cPar.em_iter, cPar.nr_iter,
@@ -1836,10 +1841,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
       }
 
       // free matrices
-      gsl_matrix_free(Vg);
-      gsl_matrix_free(Ve);
-      gsl_matrix_free(B);
-      gsl_matrix_free(se_B);
+      gsl_matrix_safe_free(Vg);
+      gsl_matrix_safe_free(Ve);
+      gsl_matrix_safe_free(B);
+      gsl_matrix_safe_free(se_B);
     }
 
     PRDT cPRDT;
@@ -1853,26 +1858,26 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
     cPRDT.WriteFiles(Y_full);
 
-    gsl_matrix_free(Y);
-    gsl_matrix_free(W);
-    gsl_matrix_free(G);
-    gsl_matrix_free(U);
-    gsl_matrix_free(UtW);
-    gsl_matrix_free(UtY);
-    gsl_vector_free(eval);
-
-    gsl_matrix_free(Y_full);
-    gsl_matrix_free(Y_hat);
-    gsl_matrix_free(W_full);
-    gsl_matrix_free(G_full);
-    gsl_matrix_free(H_full);
+    gsl_matrix_safe_free(Y);
+    gsl_matrix_safe_free(W);
+    gsl_matrix_safe_free(G);
+    gsl_matrix_safe_free(U);
+    gsl_matrix_safe_free(UtW);
+    gsl_matrix_safe_free(UtY);
+    gsl_vector_safe_free(eval);
+
+    gsl_matrix_safe_free(Y_full);
+    gsl_matrix_safe_free(Y_hat);
+    gsl_matrix_safe_free(W_full);
+    gsl_matrix_safe_free(G_full);
+    gsl_matrix_safe_free(H_full);
   }
 
   // Generate Kinship matrix (optionally using LOCO)
   if (cPar.a_mode == 21 || cPar.a_mode == 22) {
     cout << "Calculating Relatedness Matrix ... " << endl;
 
-    gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+    gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
     enforce_msg(G, "allocate G"); // just to be sure
 
     time_start = clock();
@@ -1885,7 +1890,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     }
 
     // Now we have the Kinship matrix test it
-    validate_K(G,cPar.mode_check,cPar.mode_strict);
+    validate_K(G);
 
     if (cPar.a_mode == 21) {
       cPar.WriteMatrix(G, "cXX");
@@ -1893,7 +1898,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.WriteMatrix(G, "sXX");
     }
 
-    gsl_matrix_free(G);
+    gsl_matrix_safe_free(G);
   }
 
   // Compute the LDSC weights (not implemented yet)
@@ -1917,8 +1922,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
   if (cPar.a_mode == 25 || cPar.a_mode == 26) {
     cout << "Calculating the S Matrix ... " << endl;
 
-    gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
-    gsl_vector *ns = gsl_vector_alloc(cPar.n_vc + 1);
+    gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
+    gsl_vector *ns = gsl_vector_safe_alloc(cPar.n_vc + 1);
     gsl_matrix_set_zero(S);
     gsl_vector_set_zero(ns);
 
@@ -1927,13 +1932,13 @@ void GEMMA::BatchRun(PARAM &cPar) {
         gsl_matrix_submatrix(S, cPar.n_vc, 0, cPar.n_vc, cPar.n_vc);
     gsl_vector_view ns_vec = gsl_vector_subvector(ns, 0, cPar.n_vc);
 
-    gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
-    gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+    gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+    gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
     gsl_matrix_set_zero(K);
     gsl_matrix_set_zero(A);
 
-    gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
-    gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+    gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+    gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
 
     cPar.CopyCvtPhen(W, y, 0);
 
@@ -1957,22 +1962,22 @@ void GEMMA::BatchRun(PARAM &cPar) {
     cPar.WriteVector(ns, "size");
     cPar.WriteVar("snps");
 
-    gsl_matrix_free(S);
-    gsl_vector_free(ns);
+    gsl_matrix_safe_free(S);
+    gsl_vector_safe_free(ns);
 
-    gsl_matrix_free(A);
-    gsl_matrix_free(K);
+    gsl_matrix_safe_free(A);
+    gsl_matrix_safe_free(K);
 
-    gsl_vector_free(y);
-    gsl_matrix_free(K);
+    gsl_vector_safe_free(y);
+    gsl_matrix_safe_free(K);
   }
 
   // Compute the q vector, that is used for variance component estimation using
   // summary statistics
   if (cPar.a_mode == 27 || cPar.a_mode == 28) {
-    gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
-    gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
-    gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+    gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+    gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
     gsl_vector_set_zero(q);
     gsl_vector_set_zero(s);
 
@@ -1988,8 +1993,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
                   vec_weight, vec_z2, cPar.ni_total, cPar.ns_total,
                   cPar.ns_test);
     cout << "## number of total individuals = " << cPar.ni_total << endl;
-    cout << "## number of total SNPs = " << cPar.ns_total << endl;
-    cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    cout << "## number of total SNPs/var = " << cPar.ns_total << endl;
+    cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
     cout << "## number of variance components = " << cPar.n_vc << endl;
     cout << "Calculating the q vector ... " << endl;
     Calcq(cPar.n_block, vec_cat, vec_ni, vec_weight, vec_z2, Vq, q,
@@ -2006,9 +2011,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
     cPar.WriteMatrix(Vq, "Vq");
     cPar.WriteVector(q, "q");
     cPar.WriteVector(s, "size");
-    gsl_matrix_free(Vq);
-    gsl_vector_free(q);
-    gsl_vector_free(s);
+    gsl_matrix_safe_free(Vq);
+    gsl_vector_safe_free(q);
+    gsl_vector_safe_free(s);
   }
 
   // Calculate SNP covariance.
@@ -2028,8 +2033,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
   // LM.
   if (cPar.a_mode == 51 || cPar.a_mode == 52 || cPar.a_mode == 53 ||
       cPar.a_mode == 54) { // Fit LM
-    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
-    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
+    gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
 
     // set covariates matrix W and phenotype matrix Y
     // an intercept should be included in W,
@@ -2047,8 +2052,6 @@ void GEMMA::BatchRun(PARAM &cPar) {
                         &Y_col.vector); // y is the predictor, not the phenotype
       } else if (!cPar.file_bfile.empty()) {
         cLm.AnalyzePlink(W, &Y_col.vector);
-      } else if (!cPar.file_oxford.empty()) {
-        cLm.Analyzebgen(W, &Y_col.vector);
       } else {
         cLm.AnalyzeBimbam(W, &Y_col.vector);
       }
@@ -2057,8 +2060,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cLm.CopyToParam(cPar);
     }
     // release all matrices and vectors
-    gsl_matrix_free(Y);
-    gsl_matrix_free(W);
+    gsl_matrix_safe_free(Y);
+    gsl_matrix_safe_free(W);
   }
 
   // VC estimation with one or multiple kinship matrices
@@ -2083,16 +2086,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.UpdateSNP(mapRS2wK);
 
       // Setup matrices and vectors.
-      gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc * 2, cPar.n_vc);
-      gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
-      gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
-      gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+      gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc * 2, cPar.n_vc);
+      gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+      gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+      gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
 
-      gsl_matrix *K = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
-      gsl_matrix *A = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+      gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
+      gsl_matrix *A = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc * cPar.ni_test);
 
-      gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
-      gsl_matrix *W = gsl_matrix_alloc(cPar.ni_test, cPar.n_cvt);
+      gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+      gsl_matrix *W = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_cvt);
 
       gsl_matrix_set_zero(K);
       gsl_matrix_set_zero(A);
@@ -2120,8 +2123,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
       cout << "Study Panel: " << endl;
       cout << "## number of total individuals = " << cPar.ni_study << endl;
-      cout << "## number of total SNPs = " << cPar.ns_study << endl;
-      cout << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+      cout << "## number of total SNPs/var = " << cPar.ns_study << endl;
+      cout << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
       cout << "## number of variance components = " << cPar.n_vc << endl;
 
       // compute q
@@ -2186,15 +2189,15 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.WriteVector(q, "q");
       cPar.WriteVector(s, "size");
 
-      gsl_matrix_free(S);
-      gsl_matrix_free(Vq);
-      gsl_vector_free(q);
-      gsl_vector_free(s);
+      gsl_matrix_safe_free(S);
+      gsl_matrix_safe_free(Vq);
+      gsl_vector_safe_free(q);
+      gsl_vector_safe_free(s);
 
-      gsl_matrix_free(A);
-      gsl_matrix_free(K);
-      gsl_vector_free(y);
-      gsl_matrix_free(W);
+      gsl_matrix_safe_free(A);
+      gsl_matrix_safe_free(K);
+      gsl_vector_safe_free(y);
+      gsl_matrix_safe_free(W);
     } else if (!cPar.file_study.empty() || !cPar.file_mstudy.empty()) {
       if (!cPar.file_study.empty()) {
         string sfile = cPar.file_study + ".size.txt";
@@ -2219,16 +2222,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
       cPar.n_vc = cPar.n_vc - 1;
 
-      gsl_matrix *S = gsl_matrix_alloc(2 * cPar.n_vc, cPar.n_vc);
-      gsl_matrix *Vq = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
-      // gsl_matrix *V=gsl_matrix_alloc (cPar.n_vc+1,
+      gsl_matrix *S = gsl_matrix_safe_alloc(2 * cPar.n_vc, cPar.n_vc);
+      gsl_matrix *Vq = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+      // gsl_matrix *V=gsl_matrix_safe_alloc (cPar.n_vc+1,
       // (cPar.n_vc*(cPar.n_vc+1))/2*(cPar.n_vc+1) );
-      // gsl_matrix *Vslope=gsl_matrix_alloc (n_lines+1,
+      // gsl_matrix *Vslope=gsl_matrix_safe_alloc (n_lines+1,
       // (n_lines*(n_lines+1))/2*(n_lines+1) );
-      gsl_vector *q = gsl_vector_alloc(cPar.n_vc);
-      gsl_vector *s_study = gsl_vector_alloc(cPar.n_vc);
-      gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
-      gsl_vector *s = gsl_vector_alloc(cPar.n_vc + 1);
+      gsl_vector *q = gsl_vector_safe_alloc(cPar.n_vc);
+      gsl_vector *s_study = gsl_vector_safe_alloc(cPar.n_vc);
+      gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
+      gsl_vector *s = gsl_vector_safe_alloc(cPar.n_vc + 1);
 
       gsl_matrix_set_zero(S);
       gsl_matrix_view S_mat =
@@ -2270,7 +2273,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       assert(!has_nan(cPar.v_se_pve));
 
       gsl_vector_view s_sub = gsl_vector_subvector(s, 0, cPar.n_vc);
-      gsl_vector_memcpy(&s_sub.vector, s_ref);
+      gsl_vector_safe_memcpy(&s_sub.vector, s_ref);
       gsl_vector_set(s, cPar.n_vc, cPar.ni_ref);
 
       cPar.WriteMatrix(S, "S");
@@ -2278,18 +2281,18 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.WriteVector(q, "q");
       cPar.WriteVector(s, "size");
 
-      gsl_matrix_free(S);
-      gsl_matrix_free(Vq);
-      // gsl_matrix_free (V);
-      // gsl_matrix_free (Vslope);
-      gsl_vector_free(q);
-      gsl_vector_free(s_study);
-      gsl_vector_free(s_ref);
-      gsl_vector_free(s);
+      gsl_matrix_safe_free(S);
+      gsl_matrix_safe_free(Vq);
+      // gsl_matrix_safe_free (V);
+      // gsl_matrix_safe_free (Vslope);
+      gsl_vector_safe_free(q);
+      gsl_vector_safe_free(s_study);
+      gsl_vector_safe_free(s_ref);
+      gsl_vector_safe_free(s);
     } else {
-      gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
-      gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
-      gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1 * cPar.n_vc);
+      gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+      gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+      gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1 * cPar.n_vc);
 
       // set covariates matrix W and phenotype matrix Y
       // an intercept should be included in W,
@@ -2328,7 +2331,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // center matrix G
         CenterMatrix(G);
-        validate_K(G,cPar.mode_check,cPar.mode_strict);
+        validate_K(G);
 
         (cPar.v_traceG).clear();
         double d = 0;
@@ -2366,9 +2369,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
   // the genotypes
   if (cPar.a_mode == 66 || cPar.a_mode == 67) {
     // read reference file first
-    gsl_matrix *S = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
-    gsl_matrix *Svar = gsl_matrix_alloc(cPar.n_vc, cPar.n_vc);
-    gsl_vector *s_ref = gsl_vector_alloc(cPar.n_vc);
+    gsl_matrix *S = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_matrix *Svar = gsl_matrix_safe_alloc(cPar.n_vc, cPar.n_vc);
+    gsl_vector *s_ref = gsl_vector_safe_alloc(cPar.n_vc);
 
     gsl_matrix_set_zero(S);
     gsl_matrix_set_zero(Svar);
@@ -2393,14 +2396,14 @@ void GEMMA::BatchRun(PARAM &cPar) {
     cPar.ObtainWeight(setSnps_beta, mapRS2wK);
 
     // set up matrices and vector
-    gsl_matrix *Xz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
-    gsl_matrix *XWz = gsl_matrix_alloc(cPar.ni_test, cPar.n_vc);
+    gsl_matrix *Xz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
+    gsl_matrix *XWz = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_vc);
     gsl_matrix *XtXWz =
-        gsl_matrix_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
-    gsl_vector *w = gsl_vector_alloc(mapRS2wK.size());
-    gsl_vector *w1 = gsl_vector_alloc(mapRS2wK.size());
-    gsl_vector *z = gsl_vector_alloc(mapRS2wK.size());
-    gsl_vector *s_vec = gsl_vector_alloc(cPar.n_vc);
+        gsl_matrix_safe_alloc(mapRS2wK.size(), cPar.n_vc * cPar.n_vc);
+    gsl_vector *w = gsl_vector_safe_alloc(mapRS2wK.size());
+    gsl_vector *w1 = gsl_vector_safe_alloc(mapRS2wK.size());
+    gsl_vector *z = gsl_vector_safe_alloc(mapRS2wK.size());
+    gsl_vector *s_vec = gsl_vector_safe_alloc(cPar.n_vc);
 
     vector<size_t> vec_cat, vec_size;
     vector<double> vec_z;
@@ -2462,7 +2465,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
                cPar.mindicator_snp, vec_cat, w1, z, Xz);
     }
     if (cPar.a_mode == 66) {
-      gsl_matrix_memcpy(XWz, Xz);
+      gsl_matrix_safe_memcpy(XWz, Xz);
     } else if (cPar.a_mode == 67) {
       cout << "Calculating XWz ... " << endl;
 
@@ -2507,37 +2510,37 @@ void GEMMA::BatchRun(PARAM &cPar) {
              cPar.v_se_sigma2, cPar.v_enrich, cPar.v_se_enrich);
     assert(!has_nan(cPar.v_se_pve));
 
-    gsl_matrix_free(S);
-    gsl_matrix_free(Svar);
-    gsl_vector_free(s_ref);
-
-    gsl_matrix_free(Xz);
-    gsl_matrix_free(XWz);
-    gsl_matrix_free(XtXWz);
-    gsl_vector_free(w);
-    gsl_vector_free(w1);
-    gsl_vector_free(z);
-    gsl_vector_free(s_vec);
+    gsl_matrix_safe_free(S);
+    gsl_matrix_safe_free(Svar);
+    gsl_vector_safe_free(s_ref);
+
+    gsl_matrix_safe_free(Xz);
+    gsl_matrix_safe_free(XWz);
+    gsl_matrix_safe_free(XtXWz);
+    gsl_vector_safe_free(w);
+    gsl_vector_safe_free(w1);
+    gsl_vector_safe_free(z);
+    gsl_vector_safe_free(s_vec);
   }
 
   // LMM or mvLMM or Eigen-Decomposition
   if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
       cPar.a_mode == 4 || cPar.a_mode == 5 ||
       cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
-    gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+    gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
     enforce_msg(Y, "allocate Y"); // just to be sure
-    gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
-    gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
+    gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
+    gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
                                                           // matrix
-    gsl_matrix *se_B = gsl_matrix_alloc(Y->size2, W->size2);
-    gsl_matrix *G = gsl_matrix_alloc(Y->size1, Y->size1);
-    gsl_matrix *U = gsl_matrix_alloc(Y->size1, Y->size1);
+    gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
+    gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
+    gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
     gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
     gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
     gsl_vector *eval = gsl_vector_calloc(Y->size1);
-    gsl_vector *env = gsl_vector_alloc(Y->size1);
-    gsl_vector *weight = gsl_vector_alloc(Y->size1);
-    assert_issue(cPar.issue == 26, UtY->data[0] == 0.0);
+    gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
+    gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
+    assert_issue(is_issue(26), UtY->data[0] == 0.0);
 
     // set covariates matrix W and phenotype matrix Y
     // an intercept should be included in W,
@@ -2557,7 +2560,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
       // center matrix G
       CenterMatrix(G);
-      validate_K(G,cPar.mode_check,cPar.mode_strict);
+      validate_K(G);
 
       // is residual weights are provided, then
       if (!cPar.file_weight.empty()) {
@@ -2638,7 +2641,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       CalcUtX(U, W, UtW);
       CalcUtX(U, Y, UtY);
 
-      assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+      assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
 
       LMM cLmm;
       cLmm.CopyFromParam(cPar);
@@ -2655,7 +2658,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       // calculate UtW and Uty
       CalcUtX(U, W, UtW);
       CalcUtX(U, Y, UtY);
-      assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+      assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
 
       // calculate REMLE/MLE estimate and pve for univariate model
       if (cPar.n_ph == 1) { // one phenotype
@@ -2663,31 +2666,27 @@ void GEMMA::BatchRun(PARAM &cPar) {
         gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
         gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
 
-        assert_issue(cPar.issue == 26, ROUND(UtY->data[0]) == -16.6143);
+        assert_issue(is_issue(26), ROUND(UtY->data[0]) == -16.6143);
 
         CalcLambda('L', eval, UtW, &UtY_col.vector, cPar.l_min, cPar.l_max,
                    cPar.n_region, cPar.l_mle_null, cPar.logl_mle_H0);
         assert(!std::isnan(UtY->data[0]));
-        assert(!std::isnan(B->data[0]));
-        assert(!std::isnan(se_B->data[0]));
 
         CalcLmmVgVeBeta(eval, UtW, &UtY_col.vector, cPar.l_mle_null,
                         cPar.vg_mle_null, cPar.ve_mle_null, &beta.vector,
                         &se_beta.vector);
 
         assert(!std::isnan(UtY->data[0]));
-        assert(!std::isnan(B->data[0]));
-        assert(!std::isnan(se_B->data[0]));
 
         cPar.beta_mle_null.clear();
         cPar.se_beta_mle_null.clear();
+        assert(!std::isnan(B->data[0]));
+        assert(!std::isnan(se_B->data[0]));
         for (size_t i = 0; i < B->size2; i++) {
           cPar.beta_mle_null.push_back(gsl_matrix_get(B, 0, i));
           cPar.se_beta_mle_null.push_back(gsl_matrix_get(se_B, 0, i));
         }
         assert(!std::isnan(UtY->data[0]));
-        assert(!std::isnan(B->data[0]));
-        assert(!std::isnan(se_B->data[0]));
         assert(!std::isnan(cPar.beta_mle_null.front()));
         assert(!std::isnan(cPar.se_beta_mle_null.front()));
 
@@ -2699,6 +2698,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         cPar.beta_remle_null.clear();
         cPar.se_beta_remle_null.clear();
+        assert(!std::isnan(B->data[0]));
+        assert(!std::isnan(se_B->data[0]));
+
         for (size_t i = 0; i < B->size2; i++) {
           cPar.beta_remle_null.push_back(gsl_matrix_get(B, 0, i));
           cPar.se_beta_remle_null.push_back(gsl_matrix_get(se_B, 0, i));
@@ -2710,14 +2712,14 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // calculate and output residuals
         if (cPar.a_mode == 5) {
-          gsl_vector *Utu_hat = gsl_vector_alloc(Y->size1);
-          gsl_vector *Ute_hat = gsl_vector_alloc(Y->size1);
-          gsl_vector *u_hat = gsl_vector_alloc(Y->size1);
-          gsl_vector *e_hat = gsl_vector_alloc(Y->size1);
-          gsl_vector *y_hat = gsl_vector_alloc(Y->size1);
+          gsl_vector *Utu_hat = gsl_vector_safe_alloc(Y->size1);
+          gsl_vector *Ute_hat = gsl_vector_safe_alloc(Y->size1);
+          gsl_vector *u_hat = gsl_vector_safe_alloc(Y->size1);
+          gsl_vector *e_hat = gsl_vector_safe_alloc(Y->size1);
+          gsl_vector *y_hat = gsl_vector_safe_alloc(Y->size1);
 
           // obtain Utu and Ute
-          gsl_vector_memcpy(y_hat, &UtY_col.vector);
+          gsl_vector_safe_memcpy(y_hat, &UtY_col.vector);
           gsl_blas_dgemv(CblasNoTrans, -1.0, UtW, &beta.vector, 1.0, y_hat);
 
           double d, u, e;
@@ -2738,9 +2740,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
           cPar.WriteVector(u_hat, "residU");
           cPar.WriteVector(e_hat, "residE");
 
-          gsl_vector_free(u_hat);
-          gsl_vector_free(e_hat);
-          gsl_vector_free(y_hat);
+          gsl_vector_safe_free(u_hat);
+          gsl_vector_safe_free(e_hat);
+          gsl_vector_safe_free(y_hat);
         }
       }
 
@@ -2755,18 +2757,18 @@ void GEMMA::BatchRun(PARAM &cPar) {
           gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
 
           if (!cPar.file_bfile.empty()) {
+            // PLINK analysis
             if (cPar.file_gxe.empty()) {
               cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
-                                &Y_col.vector);
-            } else {
+                                &Y_col.vector, cPar.setGWASnps);
+            }
+            else {
               cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
                                    &Y_col.vector, env);
             }
           }
-          // WJA added
-          else if (!cPar.file_oxford.empty()) {
-            cLmm.Analyzebgen(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector);
-          } else {
+          else {
+            // BIMBAM analysis
             if (cPar.file_gxe.empty()) {
               cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
                                  &Y_col.vector, cPar.setGWASnps);
@@ -2788,8 +2790,6 @@ void GEMMA::BatchRun(PARAM &cPar) {
             } else {
               cMvlmm.AnalyzePlinkGXE(U, eval, UtW, UtY, env);
             }
-          } else if (!cPar.file_oxford.empty()) {
-            cMvlmm.Analyzebgen(U, eval, UtW, UtY);
           } else {
             if (cPar.file_gxe.empty()) {
               cMvlmm.AnalyzeBimbam(U, eval, UtW, UtY);
@@ -2805,24 +2805,24 @@ void GEMMA::BatchRun(PARAM &cPar) {
     }
 
     // release all matrices and vectors
-    gsl_matrix_free(Y);
-    gsl_matrix_free(W);
-    gsl_matrix_free(B);
-    gsl_matrix_free(se_B);
-    gsl_matrix_free(G);
-    gsl_matrix_free(U);
-    gsl_matrix_free(UtW);
-    gsl_matrix_free(UtY);
-    gsl_vector_free(eval);
-    gsl_vector_free(env);
+    gsl_matrix_safe_free(Y);
+    gsl_matrix_safe_free(W);
+    gsl_matrix_safe_free(B);
+    gsl_matrix_safe_free(se_B);
+    gsl_matrix_safe_free(G);
+    gsl_matrix_safe_free(U);
+    gsl_matrix_safe_free(UtW);
+    gsl_matrix_safe_free(UtY);
+    gsl_vector_safe_free(eval);
+    gsl_vector_safe_free(env);
   }
 
   // BSLMM
   if (cPar.a_mode == 11 || cPar.a_mode == 12 || cPar.a_mode == 13) {
-    gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
-    gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
-    gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
-    gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+    gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+    gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
+    gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
+    gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
 
     // set covariates matrix W and phenotype vector y
     // an intercept should be included in W,
@@ -2845,10 +2845,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cBslmm.CopyToParam(cPar);
       // else, if rho!=1
     } else {
-      gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
-      gsl_vector *eval = gsl_vector_alloc(y->size);
-      gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
-      gsl_vector *Uty = gsl_vector_alloc(y->size);
+      gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
+      gsl_vector *eval = gsl_vector_safe_alloc(y->size);
+      gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
+      gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
 
       // read relatedness matrix G
       if (!(cPar.file_kin).empty()) {
@@ -2864,7 +2864,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // center matrix G
         CenterMatrix(G);
-        validate_K(G,cPar.mode_check,cPar.mode_strict);
+        validate_K(G);
       } else {
         cPar.ReadGenotypes(UtX, G, true);
       }
@@ -2915,24 +2915,24 @@ void GEMMA::BatchRun(PARAM &cPar) {
       }
 
       // release all matrices and vectors
-      gsl_matrix_free(G);
-      gsl_matrix_free(U);
-      gsl_matrix_free(UtW);
-      gsl_vector_free(eval);
-      gsl_vector_free(Uty);
+      gsl_matrix_safe_free(G);
+      gsl_matrix_safe_free(U);
+      gsl_matrix_safe_free(UtW);
+      gsl_vector_safe_free(eval);
+      gsl_vector_safe_free(Uty);
     }
-    gsl_matrix_free(W);
-    gsl_vector_free(y);
-    gsl_matrix_free(UtX);
+    gsl_matrix_safe_free(W);
+    gsl_vector_safe_free(y);
+    gsl_matrix_safe_free(UtX);
   }
 
   // BSLMM-DAP
   if (cPar.a_mode == 14 || cPar.a_mode == 15 || cPar.a_mode == 16) {
     if (cPar.a_mode == 14) {
-      gsl_vector *y = gsl_vector_alloc(cPar.ni_test);
-      gsl_matrix *W = gsl_matrix_alloc(y->size, cPar.n_cvt);
-      gsl_matrix *G = gsl_matrix_alloc(y->size, y->size);
-      gsl_matrix *UtX = gsl_matrix_alloc(y->size, cPar.ns_test);
+      gsl_vector *y = gsl_vector_safe_alloc(cPar.ni_test);
+      gsl_matrix *W = gsl_matrix_safe_alloc(y->size, cPar.n_cvt);
+      gsl_matrix *G = gsl_matrix_safe_alloc(y->size, y->size);
+      gsl_matrix *UtX = gsl_matrix_safe_alloc(y->size, cPar.ns_test);
 
       // set covariates matrix W and phenotype vector y
       // an intercept should be included in W,
@@ -2956,10 +2956,10 @@ void GEMMA::BatchRun(PARAM &cPar) {
         cBslmm.CopyToParam(cPar);
         // else, if rho!=1
       } else {
-        gsl_matrix *U = gsl_matrix_alloc(y->size, y->size);
-        gsl_vector *eval = gsl_vector_alloc(y->size);
-        gsl_matrix *UtW = gsl_matrix_alloc(y->size, W->size2);
-        gsl_vector *Uty = gsl_vector_alloc(y->size);
+        gsl_matrix *U = gsl_matrix_safe_alloc(y->size, y->size);
+        gsl_vector *eval = gsl_vector_safe_alloc(y->size);
+        gsl_matrix *UtW = gsl_matrix_safe_alloc(y->size, W->size2);
+        gsl_vector *Uty = gsl_vector_safe_alloc(y->size);
 
         // read relatedness matrix G
         if (!(cPar.file_kin).empty()) {
@@ -2975,7 +2975,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
           // center matrix G
           CenterMatrix(G);
-          validate_K(G,cPar.mode_check,cPar.mode_strict);
+          validate_K(G);
 
         } else {
           cPar.ReadGenotypes(UtX, G, true);
@@ -3019,16 +3019,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
         cBslmmDap.CopyToParam(cPar);
 
         // release all matrices and vectors
-        gsl_matrix_free(G);
-        gsl_matrix_free(U);
-        gsl_matrix_free(UtW);
-        gsl_vector_free(eval);
-        gsl_vector_free(Uty);
+        gsl_matrix_safe_free(G);
+        gsl_matrix_safe_free(U);
+        gsl_matrix_safe_free(UtW);
+        gsl_vector_safe_free(eval);
+        gsl_vector_safe_free(Uty);
       }
 
-      gsl_matrix_free(W);
-      gsl_vector_free(y);
-      gsl_matrix_free(UtX);
+      gsl_matrix_safe_free(W);
+      gsl_vector_safe_free(y);
+      gsl_matrix_safe_free(UtX);
     } else if (cPar.a_mode == 15) {
       // perform EM algorithm and estimate parameters
       vector<string> vec_rs;
@@ -3045,9 +3045,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
       }
 
       // load annotations
-      gsl_matrix *Ac;
-      gsl_matrix_int *Ad;
-      gsl_vector_int *dlevel;
+      gsl_matrix *Ac = NULL;
+      gsl_matrix_int *Ad = NULL;
+      gsl_vector_int *dlevel = NULL;
       size_t kc, kd;
       if (!cPar.file_cat.empty()) {
         ReadFile_cat(cPar.file_cat, vec_rs, Ac, Ad, dlevel, kc, kd);
@@ -3057,7 +3057,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       }
 
       cout << "## number of blocks = " << BF.size() << endl;
-      cout << "## number of analyzed SNPs = " << vec_rs.size() << endl;
+      cout << "## number of analyzed SNPs/var = " << vec_rs.size() << endl;
       cout << "## grid size for hyperparameters = " << wab.size() << endl;
       cout << "## number of continuous annotations = " << kc << endl;
       cout << "## number of discrete annotations = " << kd << endl;
@@ -3077,7 +3077,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.time_opt = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
       cBslmmDap.CopyToParam(cPar);
 
-      gsl_matrix_free(Ac);
+      gsl_matrix_safe_free(Ac);
       gsl_matrix_int_free(Ad);
       gsl_vector_int_free(dlevel);
     } else {
@@ -3090,6 +3090,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
   return;
 }
 
+#include "Eigen/Dense"
+
 void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
   string file_str;
   file_str = cPar.path_out + "/" + cPar.file_out;
@@ -3102,9 +3104,21 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
   }
 
   outfile << "##" << endl;
-  outfile << "## GEMMA Version = " << version << endl;
-  outfile << "## GSL Version   = " << GSL_VERSION << endl;
-  outfile << "## Eigen Version = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
+  outfile << "## GEMMA Version    = " << version << " (" << date << ")" << endl;
+  outfile << "## GSL Version      = " << GSL_VERSION << endl;
+  outfile << "## Eigen Version    = " << EIGEN_WORLD_VERSION << "." << EIGEN_MAJOR_VERSION << "." << EIGEN_MINOR_VERSION << endl;
+#ifdef OPENBLAS
+
+  #ifndef OPENBLAS_LEGACY
+  outfile << "## OpenBlas         =" << OPENBLAS_VERSION << " - " << openblas_get_config() << endl;
+  outfile << "##   arch           = " << openblas_get_corename() << endl;
+  outfile << "##   threads        = " << openblas_get_num_threads() << endl;
+  #else
+  outfile << "## OpenBlas         = " << openblas_get_config() << endl;
+  #endif
+  string* pStr = new string[4] { "sequential", "threaded", "openmp" };
+  outfile << "##   parallel type  = " << pStr[openblas_get_parallel()] << endl;
+#endif
 
   outfile << "##" << endl;
   outfile << "## Command Line Input = ";
@@ -3119,7 +3133,6 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
   tm *ptm = localtime(&rawtime);
 
   outfile << "## Date = " << asctime(ptm);
-  // ptm->tm_year<<":"<<ptm->tm_month<<":"<<ptm->tm_day":"<<ptm->tm_hour<<":"<<ptm->tm_min<<endl;
 
   outfile << "##" << endl;
   outfile << "## Summary Statistics:" << endl;
@@ -3129,11 +3142,6 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
             << cPar.ni_study << endl;
     outfile << "## number of total individuals in the reference = "
             << cPar.ni_ref << endl;
-    // outfile<<"## number of total SNPs in the sample = "<<cPar.ns_study<<endl;
-    // outfile<<"## number of total SNPs in the reference panel =
-    // "<<cPar.ns_ref<<endl;
-    // outfile<<"## number of analyzed SNPs = "<<cPar.ns_test<<endl;
-    // outfile<<"## number of analyzed SNP pairs = "<<cPar.ns_pair<<endl;
     outfile << "## number of variance components = " << cPar.n_vc << endl;
 
     outfile << "## pve estimates = ";
@@ -3183,11 +3191,11 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
             << cPar.ni_study << endl;
     outfile << "## number of total individuals in the reference = "
             << cPar.ni_total << endl;
-    outfile << "## number of total SNPs in the sample = " << cPar.ns_study
+    outfile << "## number of total SNPs/var in the sample = " << cPar.ns_study
             << endl;
-    outfile << "## number of total SNPs in the reference panel = "
+    outfile << "## number of total SNPs/var in the reference panel = "
             << cPar.ns_total << endl;
-    outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
     outfile << "## number of variance components = " << cPar.n_vc << endl;
   } else if (!cPar.file_beta.empty() &&
              (cPar.a_mode == 66 || cPar.a_mode == 67)) {
@@ -3195,9 +3203,9 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
             << cPar.ni_total << endl;
     outfile << "## number of total individuals in the reference = "
             << cPar.ni_ref << endl;
-    outfile << "## number of total SNPs in the sample = " << cPar.ns_total
+    outfile << "## number of total SNPs/var in the sample = " << cPar.ns_total
             << endl;
-    outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+    outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
     outfile << "## number of variance components = " << cPar.n_vc << endl;
 
     outfile << "## pve estimates = ";
@@ -3267,10 +3275,10 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
       outfile << "## number of total genes = " << cPar.ng_total << endl;
       outfile << "## number of analyzed genes = " << cPar.ng_test << endl;
     } else if (cPar.file_epm.empty()) {
-      outfile << "## number of total SNPs = " << cPar.ns_total << endl;
-      outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+      outfile << "## number of total SNPs/var = " << cPar.ns_total << endl;
+      outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
     } else {
-      outfile << "## number of analyzed SNPs = " << cPar.ns_test << endl;
+      outfile << "## number of analyzed SNPs/var = " << cPar.ns_test << endl;
     }
 
     if (cPar.a_mode == 13) {
diff --git a/src/gemma.h b/src/gemma.h
index cd1683a..4deab51 100644
--- a/src/gemma.h
+++ b/src/gemma.h
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
diff --git a/src/io.cpp b/src/io.cpp
index 1dc5642..d20b473 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -40,7 +42,8 @@
 #include "gsl/gsl_vector.h"
 
 #include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
+#include "fastblas.h"
 #include "gzstream.h"
 #include "io.h"
 #include "lapack.h"
@@ -49,43 +52,24 @@
 using namespace std;
 
 // Print progress bar.
-void ProgressBar(string str, double p, double total) {
-  double progress = (100.0 * p / total);
-  int barsize = (int)(progress / 2.0);
-  char bar[51];
-
-  cout << str;
-  for (int i = 0; i < 50; i++) {
-    if (i < barsize) {
-      bar[i] = '=';
-    } else {
-      bar[i] = ' ';
-    }
-    cout << bar[i];
-  }
-  cout << setprecision(2) << fixed << progress << "%\r" << flush;
-
-  return;
-}
-
-// Print progress bar with acceptance ratio.
 void ProgressBar(string str, double p, double total, double ratio) {
-  double progress = (100.0 * p / total);
-  int barsize = (int)(progress / 2.0);
-  char bar[51];
-
-  cout << str;
-  for (int i = 0; i < 50; i++) {
-    if (i < barsize) {
-      bar[i] = '=';
-    } else {
-      bar[i] = ' ';
-    }
-    cout << bar[i];
-  }
-  cout << setprecision(2) << fixed << progress << "%    " << ratio << "\r"
-       << flush;
-  return;
+  assert(p<=total);
+  assert(p>=0);
+  if (total <= 0.0) return;
+  const double progress = (100.0 * p / total);
+  const uint barsize = (int)(progress / 2.0); // characters
+  // cout << barsize << endl;
+  // cout << str << " ";
+  // cout << p << "/" << total << endl;
+  assert(barsize < 101); // corrupted data somehow
+  if (barsize > 0) {
+    cout << std::string(barsize,'=');
+  }
+  cout << std::string(50-barsize,' ');
+  cout << setprecision(0) << fixed << " " << progress << "%";
+  if (ratio != -1.0)
+    cout << setprecision(2) << "    " << ratio;
+  cout << "\r" << flush;
 }
 
 bool isBlankLine(char const *line) {
@@ -177,7 +161,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.rs_col == 0 && (header.chr_col == 0 || header.pos_col == 0)) {
@@ -233,7 +217,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
   size_t flag = 0;
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     ch_ptr = strtok(NULL, " , \t");
 
     if (ch_ptr != NULL && strcmp(ch_ptr, "estimated") == 0) {
@@ -241,7 +225,7 @@ bool ReadFile_log(const string &file_log, double &pheno_mean) {
       if (ch_ptr != NULL && strcmp(ch_ptr, "mean") == 0) {
         ch_ptr = strtok(NULL, " , \t");
         if (ch_ptr != NULL && strcmp(ch_ptr, "=") == 0) {
-          ch_ptr = strtok(NULL, " , \t");
+          ch_ptr = strtok_safe(NULL, " , \t");
           pheno_mean = atof(ch_ptr);
           flag = 1;
         }
@@ -339,7 +323,7 @@ bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
   string id;
   double p;
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (int i = 0; i < (p_column - 1); ++i) {
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -511,17 +495,17 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
   string minor;
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
     chr = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     cM = atof(ch_ptr);
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     b_pos = atol(ch_ptr);
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     minor = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     major = ch_ptr;
 
     SNPINFO sInfo = {chr, rs, cM, b_pos, minor, major, 0, -9, -9, 0, 0, 0};
@@ -567,12 +551,12 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
   }
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     id = ch_ptr;
-    ch_ptr = strtok(NULL, " \t");
-    ch_ptr = strtok(NULL, " \t");
-    ch_ptr = strtok(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
+    ch_ptr = strtok_safe(NULL, " \t");
     ch_ptr = strtok(NULL, " \t");
 
     size_t i = 0;
@@ -620,7 +604,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
                    const double &r2_level, map<string, string> &mapRS2chr,
                    map<string, long int> &mapRS2bp,
                    map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
-                   size_t &ns_test, bool debug) {
+                   size_t &ns_test) {
   debug_msg("entered");
   indicator_snp.clear();
   snpInfo.clear();
@@ -631,12 +615,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     return false;
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(W->size1);
-  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
+  gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -674,11 +658,11 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   file_pos = 0;
   auto count_warnings = 0;
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     minor = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     major = ch_ptr;
 
     if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
@@ -693,7 +677,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     }
 
     if (mapRS2bp.count(rs) == 0) {
-      if (debug && count_warnings++ < 10) {
+      if (is_debug_mode() && count_warnings++ < 10) {
         std::string msg = "Can't figure out position for ";
         msg += rs;
         debug_msg(msg);
@@ -719,7 +703,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     c_idv = 0;
     gsl_vector_set_zero(genotype_miss);
     for (int i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0)
         continue;
 
@@ -842,12 +826,12 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
     return false;
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(W->size1);
-  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
+  gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -1029,13 +1013,13 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
   bool flag = false;
 
   for (size_t i = 0; i < inc; i++) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
   }
 
   if (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     geno_mean = 0.0;
     double d;
@@ -1043,7 +1027,7 @@ bool Bimbam_ReadOneSNP(const size_t inc, const vector<int> &indicator_idv,
     vector<size_t> geno_miss;
 
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -1159,9 +1143,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
     size_t i_test = 0, i_total = 0, j_test = 0, j_total = 0;
     while (getline(infile, line)) {
       if (i_total == ni_total) {
-        cout << "error! number of rows in the kinship "
-             << "file is larger than the number of phentypes." << endl;
-        error = true;
+        fail_msg("number of rows in the kinship file is larger than the number of phentypes");
       }
 
       if (indicator_idv[i_total] == 0) {
@@ -1174,10 +1156,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
       ch_ptr = strtok((char *)line.c_str(), " , \t");
       while (ch_ptr != NULL) {
         if (j_total == ni_total) {
-          cout << "error! number of columns in the "
-               << "kinship file is larger than the number"
-               << " of phenotypes for row = " << i_total << endl;
-          error = true;
+          fail_msg(string("number of columns in the kinship file is larger than the number of individuals for row = ")+to_string(i_total));
         }
 
         d = atof(ch_ptr);
@@ -1190,18 +1169,14 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
         ch_ptr = strtok(NULL, " , \t");
       }
       if (j_total != ni_total) {
-        cout << "error! number of columns in the kinship "
-             << "file do not match the number of phentypes for "
-             << "row = " << i_total << endl;
-        error = true;
+        string msg = "number of columns in the kinship file does not match the number of individuals for row = " + to_string( i_total );
+        fail_msg(msg);
       }
       i_total++;
       i_test++;
     }
     if (i_total != ni_total) {
-      cout << "error! number of rows in the kinship file do "
-           << "not match the number of phenotypes." << endl;
-      error = true;
+      fail_msg("number of rows in the kinship file does not match the number of individuals.");
     }
   } else {
     map<size_t, size_t> mapID2ID;
@@ -1218,11 +1193,11 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
     size_t n_id1, n_id2;
 
     while (getline(infile, line)) {
-      ch_ptr = strtok((char *)line.c_str(), " , \t");
+      ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
       id1 = ch_ptr;
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       id2 = ch_ptr;
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       d = atof(ch_ptr);
       if (mapID2num.count(id1) == 0 || mapID2num.count(id2) == 0) {
         continue;
@@ -1237,9 +1212,10 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
 
       Cov_d = gsl_matrix_get(G, n_id1, n_id2);
       if (Cov_d != 0 && Cov_d != d) {
-        cout << "error! redundant and unequal terms in the "
+        cerr << "error! redundant and unequal terms in the "
              << "kinship file, for id1 = " << id1 << " and id2 = " << id2
              << endl;
+        fail_msg("");
       } else {
         gsl_matrix_set(G, n_id1, n_id2, d);
         gsl_matrix_set(G, n_id2, n_id1, d);
@@ -1278,7 +1254,6 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 
   infile.close();
   infile.clear();
-  return;
 }
 
 void ReadFile_eigenU(const string &file_ku, bool &error, gsl_matrix *U) {
@@ -1354,7 +1329,7 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
       error = true;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     d = atof(ch_ptr);
 
     ch_ptr = strtok(NULL, " , \t");
@@ -1391,12 +1366,12 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   bool process_ksnps = ksnps.size();
 
   size_t ni_total = matrix_kin->size1;
-  gsl_vector *geno = gsl_vector_alloc(ni_total);
-  gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
+  gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
+  gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total);
 
   // Xlarge contains inds x markers
   const size_t msize = K_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, msize);
   enforce_msg(Xlarge, "allocate Xlarge");
 
   gsl_matrix_set_zero(Xlarge);
@@ -1405,9 +1380,9 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   size_t ns_test = 0;
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
     string line;
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
-      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+      ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
     if (indicator_snp[t] == 0)
       continue;
@@ -1421,7 +1396,12 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
       uint token_num = 0;
       for (auto x = tokens; x != rend; x++)
         token_num++;
-      enforce_str(token_num == ni_total + 3, line + " count fields");
+      if (token_num != ni_total+3) {
+        cerr << line << endl;
+        cerr << token_num << " != " << ni_total << endl;
+        warning_msg("Columns in geno file do not match # individuals");
+      }
+      enforce_msg(token_num <= ni_total + 3,"not enough genotype fields");
     }
 
     auto snp = *tokens; // first field
@@ -1480,12 +1460,12 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
 
     // compute kinship matrix and return in matrix_kin a SNP at a time
     if (ns_test % msize == 0) {
-      eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+      fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
       gsl_matrix_set_zero(Xlarge);
     }
   }
   if (ns_test % msize != 0) {
-    eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+    fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
   }
   cout << endl;
 
@@ -1531,14 +1511,14 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
   double d, geno_mean, geno_var;
 
   size_t ni_total = matrix_kin->size1;
-  gsl_vector *geno = gsl_vector_alloc(ni_total);
+  gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
 
   size_t ns_test = 0;
   int n_bit;
 
   // Create a large matrix.
   const size_t msize = K_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, msize);
   gsl_matrix_set_zero(Xlarge);
 
   // Calculate n_bit and c, the number of bit for each snp.
@@ -1556,7 +1536,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
-      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+      ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
@@ -1626,13 +1606,13 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
     ns_test++;
 
     if (ns_test % msize == 0) {
-      eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+      fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
       gsl_matrix_set_zero(Xlarge);
     }
   }
 
   if (ns_test % msize != 0) {
-    eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+    fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
   }
 
   cout << endl;
@@ -1659,7 +1639,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 // genotype and calculate K.
 bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
-                   const bool calc_K, bool debug) {
+                   const bool calc_K) {
   debug_msg("entered");
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
@@ -1674,8 +1654,8 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
     gsl_matrix_set_zero(K);
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
-  gsl_vector *genotype_miss = gsl_vector_alloc(UtX->size1);
+  gsl_vector *genotype = gsl_vector_safe_alloc(UtX->size1);
+  gsl_vector *genotype_miss = gsl_vector_safe_alloc(UtX->size1);
   double geno, geno_mean;
   size_t n_miss;
 
@@ -1687,21 +1667,21 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
   int c_idv = 0, c_snp = 0;
 
   for (int i = 0; i < ns_total; ++i) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (indicator_snp[i] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     c_idv = 0;
     geno_mean = 0;
     n_miss = 0;
     gsl_vector_set_zero(genotype_miss);
     for (int j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[j] == 0) {
         continue;
       }
@@ -1764,7 +1744,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp,
                    vector<vector<unsigned char>> &Xt, gsl_matrix *K,
                    const bool calc_K, const size_t ni_test,
-                   const size_t ns_test, bool debug) {
+                   const size_t ns_test) {
   debug_msg("entered");
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
@@ -1785,8 +1765,8 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
     gsl_matrix_set_zero(K);
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(ni_test);
-  gsl_vector *genotype_miss = gsl_vector_alloc(ni_test);
+  gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
+  gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test);
   double geno, geno_mean;
   size_t n_miss;
 
@@ -1796,21 +1776,21 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
   size_t c_idv = 0, c_snp = 0;
 
   for (size_t i = 0; i < ns_total; ++i) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (indicator_snp[i] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     c_idv = 0;
     geno_mean = 0;
     n_miss = 0;
     gsl_vector_set_zero(genotype_miss);
     for (uint j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[j] == 0) {
         continue;
       }
@@ -1904,7 +1884,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
     gsl_matrix_set_zero(K);
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(UtX->size1);
+  gsl_vector *genotype = gsl_vector_safe_alloc(UtX->size1);
 
   double geno, geno_mean;
   size_t n_miss;
@@ -2040,7 +2020,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
     gsl_matrix_set_zero(K);
   }
 
-  gsl_vector *genotype = gsl_vector_alloc(ni_test);
+  gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
 
   double geno, geno_mean;
   size_t n_miss;
@@ -2160,22 +2140,26 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
   size_t n = *max_element(est_column.begin(), est_column.end());
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " \t");
 
     alpha = 0.0;
     beta = 0.0;
     gamma = 1.0;
     for (size_t i = 0; i < n + 1; ++i) {
       if (i == est_column[0] - 1) {
+        enforce(ch_ptr);
         rs = ch_ptr;
       }
       if (i == est_column[1] - 1) {
+        enforce(ch_ptr);
         alpha = atof(ch_ptr);
       }
       if (i == est_column[2] - 1) {
+        enforce(ch_ptr);
         beta = atof(ch_ptr);
       }
       if (i == est_column[3] - 1) {
+        enforce(ch_ptr);
         gamma = atof(ch_ptr);
       }
       if (i < n) {
@@ -2237,7 +2221,7 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
   getline(infile, line);
 
   while (getline(infile, line)) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
 
     ch_ptr = strtok(NULL, " , \t");
@@ -2274,759 +2258,6 @@ bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
   return true;
 }
 
-// WJA Added
-// Read Oxford sample file.
-bool ReadFile_sample(const string &file_sample,
-                     vector<vector<int>> &indicator_pheno,
-                     vector<vector<double>> &pheno,
-                     const vector<size_t> &p_column, vector<int> &indicator_cvt,
-                     vector<vector<double>> &cvt, size_t &n_cvt) {
-  debug_msg("entered");
-  indicator_pheno.clear();
-  pheno.clear();
-  indicator_cvt.clear();
-
-  igzstream infile(file_sample.c_str(), igzstream::in);
-
-  if (!infile) {
-    cout << "error! fail to open sample file: " << file_sample << endl;
-    return false;
-  }
-
-  string line;
-  char *ch_ptr;
-
-  string id;
-  double p, d;
-
-  vector<double> pheno_row;
-  vector<int> ind_pheno_row;
-  int flag_na = 0;
-
-  size_t num_cols = 0;
-  size_t num_p_in_file = 0;
-  size_t num_cvt_in_file = 0;
-
-  map<size_t, size_t> mapP2c;
-  for (size_t i = 0; i < p_column.size(); i++) {
-    mapP2c[p_column[i]] = i;
-    pheno_row.push_back(-9);
-    ind_pheno_row.push_back(0);
-  }
-
-  // Read header line1.
-  if (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
-    if (strcmp(ch_ptr, "ID_1") != 0) {
-      return false;
-    }
-    ch_ptr = strtok(NULL, " \t");
-    if (strcmp(ch_ptr, "ID_2") != 0) {
-      return false;
-    }
-    ch_ptr = strtok(NULL, " \t");
-    if (strcmp(ch_ptr, "missing") != 0) {
-      return false;
-    }
-    while (ch_ptr != NULL) {
-      num_cols++;
-      ch_ptr = strtok(NULL, " \t");
-    }
-    num_cols--;
-  }
-
-  vector<map<uint32_t, size_t>> cvt_factor_levels;
-
-  char col_type[num_cols];
-
-  // Read header line2.
-  if (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " \t");
-    if (strcmp(ch_ptr, "0") != 0) {
-      return false;
-    }
-    ch_ptr = strtok(NULL, " \t");
-    if (strcmp(ch_ptr, "0") != 0) {
-      return false;
-    }
-    ch_ptr = strtok(NULL, " \t");
-    if (strcmp(ch_ptr, "0") != 0) {
-      return false;
-    }
-    size_t it = 0;
-    ch_ptr = strtok(NULL, " \t");
-    if (ch_ptr != NULL)
-      while (ch_ptr != NULL) {
-        col_type[it++] = ch_ptr[0];
-        if (ch_ptr[0] == 'D') {
-          cvt_factor_levels.push_back(map<uint32_t, size_t>());
-          num_cvt_in_file++;
-        }
-        if (ch_ptr[0] == 'C') {
-          num_cvt_in_file++;
-        }
-        if ((ch_ptr[0] == 'P') || (ch_ptr[0] == 'B')) {
-          num_p_in_file++;
-        }
-        ch_ptr = strtok(NULL, " \t");
-      }
-  }
-
-  while (!safeGetline(infile, line).eof()) {
-
-    ch_ptr = strtok((char *)line.c_str(), " \t");
-
-    for (int it = 0; it < 3; it++) {
-      ch_ptr = strtok(NULL, " \t");
-    }
-
-    size_t i = 0;
-    size_t p_i = 0;
-    size_t fac_cvt_i = 0;
-
-    while (i < num_cols) {
-
-      if ((col_type[i] == 'P') || (col_type[i] == 'B')) {
-        if (mapP2c.count(p_i + 1) != 0) {
-          if (strcmp(ch_ptr, "NA") == 0) {
-            ind_pheno_row[mapP2c[p_i + 1]] = 0;
-            pheno_row[mapP2c[p_i + 1]] = -9;
-          } else {
-            p = atof(ch_ptr);
-            ind_pheno_row[mapP2c[p_i + 1]] = 1;
-            pheno_row[mapP2c[p_i + 1]] = p;
-          }
-        }
-        p_i++;
-      }
-      if (col_type[i] == 'D') {
-
-        // NOTE THIS DOES NOT CHECK TO BE SURE LEVEL
-        // IS INTEGRAL i.e for atoi error.
-        if (strcmp(ch_ptr, "NA") != 0) {
-          uint32_t level = atoi(ch_ptr);
-          if (cvt_factor_levels[fac_cvt_i].count(level) == 0) {
-            cvt_factor_levels[fac_cvt_i][level] =
-                cvt_factor_levels[fac_cvt_i].size();
-          }
-        }
-        fac_cvt_i++;
-      }
-
-      ch_ptr = strtok(NULL, " \t");
-      i++;
-    }
-
-    indicator_pheno.push_back(ind_pheno_row);
-    pheno.push_back(pheno_row);
-  }
-
-  // Close and reopen the file.
-  infile.close();
-  infile.clear();
-
-  if (num_cvt_in_file > 0) {
-    igzstream infile2(file_sample.c_str(), igzstream::in);
-
-    if (!infile2) {
-      cout << "error! fail to open sample file: " << file_sample << endl;
-      return false;
-    }
-
-    // Skip header.
-    safeGetline(infile2, line);
-    safeGetline(infile2, line);
-
-    // Pull in the covariates now we now the number of
-    // factor levels.
-    while (!safeGetline(infile2, line).eof()) {
-
-      vector<double> v_d;
-      flag_na = 0;
-      ch_ptr = strtok((char *)line.c_str(), " \t");
-
-      for (int it = 0; it < 3; it++) {
-        ch_ptr = strtok(NULL, " \t");
-      }
-
-      size_t i = 0;
-      size_t fac_cvt_i = 0;
-      size_t num_fac_levels;
-      while (i < num_cols) {
-
-        if (col_type[i] == 'C') {
-          if (strcmp(ch_ptr, "NA") == 0) {
-            flag_na = 1;
-            d = -9;
-          } else {
-            d = atof(ch_ptr);
-          }
-
-          v_d.push_back(d);
-        }
-
-        if (col_type[i] == 'D') {
-
-          // NOTE THIS DOES NOT CHECK TO BE SURE
-          // LEVEL IS INTEGRAL i.e for atoi error.
-          num_fac_levels = cvt_factor_levels[fac_cvt_i].size();
-          if (num_fac_levels > 1) {
-            if (strcmp(ch_ptr, "NA") == 0) {
-              flag_na = 1;
-              for (size_t it = 0; it < num_fac_levels - 1; it++) {
-                v_d.push_back(-9);
-              }
-            } else {
-              uint32_t level = atoi(ch_ptr);
-              for (size_t it = 0; it < num_fac_levels - 1; it++) {
-                cvt_factor_levels[fac_cvt_i][level] == it + 1
-                    ? v_d.push_back(1.0)
-                    : v_d.push_back(0.0);
-              }
-            }
-          }
-          fac_cvt_i++;
-        }
-
-        ch_ptr = strtok(NULL, " \t");
-        i++;
-      }
-
-      if (flag_na == 0) {
-        indicator_cvt.push_back(1);
-      } else {
-        indicator_cvt.push_back(0);
-      }
-      cvt.push_back(v_d);
-    }
-
-    if (indicator_cvt.empty()) {
-      n_cvt = 0;
-    } else {
-      flag_na = 0;
-      for (vector<int>::size_type i = 0; i < indicator_cvt.size(); ++i) {
-        if (indicator_cvt[i] == 0) {
-          continue;
-        }
-
-        if (flag_na == 0) {
-          flag_na = 1;
-          n_cvt = cvt[i].size();
-        }
-        if (flag_na != 0 && n_cvt != cvt[i].size()) {
-          cout << "error! number of covariates in row " << i
-               << " do not match other rows." << endl;
-          return false;
-        }
-      }
-    }
-
-    infile2.close();
-    infile2.clear();
-  }
-  return true;
-}
-
-// WJA Added.
-// Read bgen file, the first time.
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
-                   const gsl_matrix *W, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-                   const double &maf_level, const double &miss_level,
-                   const double &hwe_level, const double &r2_level,
-                   size_t &ns_test) {
-
-  debug_msg("entered");
-  indicator_snp.clear();
-
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return false;
-  }
-
-  gsl_vector *genotype = gsl_vector_alloc(W->size1);
-  gsl_vector *genotype_miss = gsl_vector_alloc(W->size1);
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
-  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
-
-  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-  int sig;
-  LUDecomp(WtW, pmt, &sig);
-  LUInvert(WtW, pmt, WtWi);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-  bool LongIds = bgen_flags & 0x4;
-
-  if (!LongIds) {
-    return false;
-  }
-
-  infile.ignore(bgen_snp_block_offset);
-
-  ns_test = 0;
-
-  size_t ns_total = static_cast<size_t>(bgen_nsnps);
-
-  snpInfo.clear();
-  string rs;
-  long int b_pos;
-  string chr;
-  string major;
-  string minor;
-  string id;
-
-  double v_x, v_w;
-  int c_idv = 0;
-
-  double maf, geno, geno_old;
-  size_t n_miss;
-  size_t n_0, n_1, n_2;
-  int flag_poly;
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB;
-  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-  // Total number of samples in phenotype file.
-  size_t ni_total = indicator_idv.size();
-
-  // Number of samples to use in test.
-  size_t ni_test = 0;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-
-  for (size_t i = 0; i < ni_total; ++i) {
-    ni_test += indicator_idv[i];
-  }
-
-  for (size_t t = 0; t < ns_total; ++t) {
-
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    // Should we switch according to MAF?
-    minor = bgen_B_allele;
-    major = bgen_A_allele;
-    b_pos = static_cast<long int>(bgen_SNP_pos);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
-      SNPINFO sInfo = {
-          "-9", rs,          -9, -9, minor, major, static_cast<size_t>(-9),
-          -9,   (long int)-9};
-
-      snpInfo.push_back(sInfo);
-      indicator_snp.push_back(0);
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    maf = 0;
-    n_miss = 0;
-    flag_poly = 0;
-    geno_old = -9;
-    n_0 = 0;
-    n_1 = 0;
-    n_2 = 0;
-    c_idv = 0;
-    gsl_vector_set_zero(genotype_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-
-      // CHECK this set correctly!
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-
-      // CHECK 0.1 OK.
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(genotype_miss, c_idv, 1);
-        n_miss++;
-        c_idv++;
-        continue;
-      }
-
-      bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-      bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-      bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-      geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-      if (geno >= 0 && geno <= 0.5) {
-        n_0++;
-      }
-      if (geno > 0.5 && geno < 1.5) {
-        n_1++;
-      }
-      if (geno >= 1.5 && geno <= 2.0) {
-        n_2++;
-      }
-
-      gsl_vector_set(genotype, c_idv, geno);
-
-      // CHECK WHAT THIS DOES.
-      if (flag_poly == 0) {
-        geno_old = geno;
-        flag_poly = 2;
-      }
-      if (flag_poly == 2 && geno != geno_old) {
-        flag_poly = 1;
-      }
-
-      maf += geno;
-
-      c_idv++;
-    }
-
-    maf /= 2.0 * static_cast<double>(ni_test - n_miss);
-
-    SNPINFO sInfo = {chr,   rs,    -9,     b_pos,
-                     minor, major, n_miss, (double)n_miss / (double)ni_test,
-                     maf};
-    snpInfo.push_back(sInfo);
-
-    if ((double)n_miss / (double)ni_test > miss_level) {
-      indicator_snp.push_back(0);
-      continue;
-    }
-
-    if ((maf < maf_level || maf > (1.0 - maf_level)) && maf_level != -1) {
-      indicator_snp.push_back(0);
-      continue;
-    }
-
-    if (flag_poly != 1) {
-      indicator_snp.push_back(0);
-      continue;
-    }
-
-    if (hwe_level != 0 && maf_level != -1) {
-      if (CalcHWE(n_0, n_2, n_1) < hwe_level) {
-        indicator_snp.push_back(0);
-        continue;
-      }
-    }
-
-    // Filter SNP if it is correlated with W
-    // unless W has only one column, of 1s.
-    for (size_t i = 0; i < genotype->size; ++i) {
-      if (gsl_vector_get(genotype_miss, i) == 1) {
-        geno = maf * 2.0;
-        gsl_vector_set(genotype, i, geno);
-      }
-    }
-
-    gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
-    gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
-    gsl_blas_ddot(genotype, genotype, &v_x);
-    gsl_blas_ddot(Wtx, WtWiWtx, &v_w);
-
-    if (W->size2 != 1 && v_w / v_x >= r2_level) {
-      indicator_snp.push_back(0);
-      continue;
-    }
-
-    indicator_snp.push_back(1);
-    ns_test++;
-  }
-
-  return true;
-}
-
-// Read oxford genotype file and calculate kinship matrix.
-bool bgenKin(const string &file_oxford, vector<int> &indicator_snp,
-             const int k_mode, const int display_pace, gsl_matrix *matrix_kin) {
-  debug_msg("entered");
-  string file_bgen = file_oxford;
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return false;
-  }
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB;
-  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  double genotype;
-
-  size_t n_miss;
-  double d, geno_mean, geno_var;
-
-  size_t ni_total = matrix_kin->size1;
-  gsl_vector *geno = gsl_vector_alloc(ni_total);
-  gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
-
-  size_t ns_test = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-
-    if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
-      ProgressBar("Reading bgen SNPs  ", t, indicator_snp.size() - 1);
-    }
-
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    geno_mean = 0.0;
-    n_miss = 0;
-    geno_var = 0.0;
-    gsl_vector_set_all(geno_miss, 0);
-
-    for (size_t i = 0; i < bgen_N; ++i) {
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-      // WJA
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(geno_miss, i, 0.0);
-        n_miss++;
-      } else {
-
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        genotype = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(geno, i, genotype);
-        gsl_vector_set(geno_miss, i, 1.0);
-        geno_mean += genotype;
-        geno_var += genotype * genotype;
-      }
-    }
-
-    geno_mean /= (double)(ni_total - n_miss);
-    geno_var += geno_mean * geno_mean * (double)n_miss;
-    geno_var /= (double)ni_total;
-    geno_var -= geno_mean * geno_mean;
-
-    for (size_t i = 0; i < ni_total; ++i) {
-      if (gsl_vector_get(geno_miss, i) == 0) {
-        gsl_vector_set(geno, i, geno_mean);
-      }
-    }
-
-    gsl_vector_add_constant(geno, -1.0 * geno_mean);
-
-    if (geno_var != 0) {
-      if (k_mode == 1) {
-        gsl_blas_dsyr(CblasUpper, 1.0, geno, matrix_kin);
-      } else if (k_mode == 2) {
-        gsl_blas_dsyr(CblasUpper, 1.0 / geno_var, geno, matrix_kin);
-      } else {
-        cout << "Unknown kinship mode." << endl;
-      }
-    }
-
-    ns_test++;
-  }
-  cout << endl;
-
-  gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
-
-  for (size_t i = 0; i < ni_total; ++i) {
-    for (size_t j = 0; j < i; ++j) {
-      d = gsl_matrix_get(matrix_kin, j, i);
-      gsl_matrix_set(matrix_kin, i, j, d);
-    }
-  }
-
-  gsl_vector_free(geno);
-  gsl_vector_free(geno_miss);
-
-  infile.close();
-  infile.clear();
-
-  return true;
-}
-
 // Read header to determine which column contains which item.
 bool ReadHeader_io(const string &line, HEADER &header) {
   debug_msg("entered");
@@ -3314,7 +2545,7 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   // Use the header to count the number of categories.
@@ -3340,10 +2571,11 @@ bool ReadFile_cat(const string &file_cat, map<string, size_t> &mapRS2cat,
 
   // Read the following lines to record mapRS2cat.
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     i_cat = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       } else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -3436,13 +2668,13 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
   double d, geno_mean, geno_var;
 
   size_t ni_test = matrix_kin->size1;
-  gsl_vector *geno = gsl_vector_alloc(ni_test);
-  gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
+  gsl_vector *geno = gsl_vector_safe_alloc(ni_test);
+  gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_test);
 
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -3459,21 +2691,21 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
 
   // Create a large matrix.
   const size_t msize = K_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_test, msize * n_vc);
   gsl_matrix_set_zero(Xlarge);
 
   size_t ns_test = 0;
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
-      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+      ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
     if (indicator_snp[t] == 0)
       continue;
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     rs = snpInfo[t].rs_number; // This line is new.
 
@@ -3487,7 +2719,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
       if (indicator_idv[i] == 0) {
         continue;
       }
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
@@ -3536,7 +2768,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
         ns_vec[0]++;
 
         if (ns_vec[0] % msize == 0) {
-          eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+          fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
           gsl_matrix_set_zero(Xlarge);
         }
       } else if (mapRS2cat.count(rs) != 0) {
@@ -3553,7 +2785,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
               gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
           gsl_matrix_view kin_sub = gsl_matrix_submatrix(
               matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
-          eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+          fast_eigen_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
                          &kin_sub.matrix);
 
           gsl_matrix_set_zero(&X_sub.matrix);
@@ -3569,7 +2801,7 @@ bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
           gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
       gsl_matrix_view kin_sub =
           gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
-      eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+      fast_eigen_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
                      &kin_sub.matrix);
     }
   }
@@ -3628,12 +2860,12 @@ bool PlinkKin(const string &file_bed, const int display_pace,
 
   size_t ni_test = matrix_kin->size1;
   size_t ni_total = indicator_idv.size();
-  gsl_vector *geno = gsl_vector_alloc(ni_test);
+  gsl_vector *geno = gsl_vector_safe_alloc(ni_test);
 
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *WtWiWtx = gsl_vector_alloc(W->size2);
+  gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
   gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
@@ -3653,7 +2885,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
 
   // Create a large matrix.
   const size_t msize = K_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_test, msize * n_vc);
   gsl_matrix_set_zero(Xlarge);
 
   // Calculate n_bit and c, the number of bit for each SNP.
@@ -3671,7 +2903,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
 
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
-      ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
+      ProgressBar("Reading SNPs", t, indicator_snp.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
@@ -3762,7 +2994,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
         ns_vec[0]++;
 
         if (ns_vec[0] % msize == 0) {
-          eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+          fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
           gsl_matrix_set_zero(Xlarge);
         }
       } else if (mapRS2cat.count(rs) != 0) {
@@ -3779,7 +3011,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
               gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
           gsl_matrix_view kin_sub = gsl_matrix_submatrix(
               matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
-          eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+          fast_eigen_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
                          &kin_sub.matrix);
 
           gsl_matrix_set_zero(&X_sub.matrix);
@@ -3795,7 +3027,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
           gsl_matrix_submatrix(Xlarge, 0, msize * i_vc, ni_test, msize);
       gsl_matrix_view kin_sub =
           gsl_matrix_submatrix(matrix_kin, 0, ni_test * i_vc, ni_test, ni_test);
-      eigenlib_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
+      fast_eigen_dgemm("N", "T", 1.0, &X_sub.matrix, &X_sub.matrix, 1.0,
                      &kin_sub.matrix);
     }
   }
@@ -3852,8 +3084,8 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
 
   string file_name;
 
-  gsl_matrix *kin_tmp = gsl_matrix_alloc(matrix_kin->size1, matrix_kin->size2);
-  gsl_vector *ns_tmp = gsl_vector_alloc(vector_ns->size);
+  gsl_matrix *kin_tmp = gsl_matrix_safe_alloc(matrix_kin->size1, matrix_kin->size2);
+  gsl_vector *ns_tmp = gsl_vector_safe_alloc(vector_ns->size);
 
   size_t l = 0;
   double d;
@@ -3929,9 +3161,9 @@ bool ReadFile_wsnp(const string &file_wsnp, map<string, double> &mapRS2weight) {
   double weight;
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
     weight = atof(ch_ptr);
     mapRS2weight[rs] = weight;
   }
@@ -3960,17 +3192,18 @@ bool ReadFile_wsnp(const string &file_wcat, const size_t n_vc,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   while (!safeGetline(infile, line).eof()) {
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     size_t t = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       } else if (header.chr_col != 0 && header.chr_col == i + 1) {
@@ -4046,13 +3279,12 @@ void ReadFile_beta(const string &file_beta,
   string type;
 
   string rs, chr, a1, a0, pos, cm;
-  double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, zsquare = 0,
-         af = 0, var_x = 0;
+  double z = 0, beta = 0, se_beta = 0, pvalue = 0, zsquare = 0; // af = 0;
   size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.n_col == 0) {
@@ -4074,21 +3306,22 @@ void ReadFile_beta(const string &file_beta,
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     z = 0;
     beta = 0;
     se_beta = 0;
-    chisq = 0;
+    auto chisq = 0.0;
     pvalue = 0;
     n_total = 0;
     n_mis = 0;
     n_obs = 0;
     n_case = 0;
     n_control = 0;
-    af = 0;
-    var_x = 0;
+    // af = 0;
+    // auto var_x = 0.0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -4118,7 +3351,7 @@ void ReadFile_beta(const string &file_beta,
         se_beta = atof(ch_ptr);
       }
       if (header.chisq_col != 0 && header.chisq_col == i + 1) {
-        chisq = atof(ch_ptr);
+         chisq = atof(ch_ptr);
       }
       if (header.p_col != 0 && header.p_col == i + 1) {
         pvalue = atof(ch_ptr);
@@ -4139,12 +3372,12 @@ void ReadFile_beta(const string &file_beta,
       if (header.ncontrol_col != 0 && header.ncontrol_col == i + 1) {
         n_control = atoi(ch_ptr);
       }
-      if (header.af_col != 0 && header.af_col == i + 1) {
-        af = atof(ch_ptr);
-      }
-      if (header.var_col != 0 && header.var_col == i + 1) {
-        var_x = atof(ch_ptr);
-      }
+      // if (header.af_col != 0 && header.af_col == i + 1) {
+      //   af = atof(ch_ptr);
+      // }
+      // if (header.var_col != 0 && header.var_col == i + 1) {
+      //   var_x = atof(ch_ptr);
+      // }
 
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -4177,9 +3410,9 @@ void ReadFile_beta(const string &file_beta,
     }
 
     // Obtain var_x.
-    if (header.var_col == 0 && header.af_col != 0) {
-      var_x = 2.0 * af * (1.0 - af);
-    }
+    // if (header.var_col == 0 && header.af_col != 0) {
+    //   var_x = 2.0 * af * (1.0 - af);
+    // }
 
     // If the SNP is also present in cor file, then do calculations.
     if ((mapRS2wA.size() == 0 || mapRS2wA.count(rs) != 0) &&
@@ -4228,13 +3461,13 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
   string type;
 
   string rs, chr, a1, a0, pos, cm;
-  double z = 0, beta = 0, se_beta = 0, chisq = 0, pvalue = 0, af = 0, var_x = 0;
+  double z = 0, beta = 0, se_beta = 0; // pvalue = 0, chisq=0, af = 0 , var_x = 0;
   size_t n_total = 0, n_mis = 0, n_obs = 0, n_case = 0, n_control = 0;
   size_t ni_total = 0, ns_total = 0, ns_test = 0;
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_io(line, header);
 
   if (header.n_col == 0) {
@@ -4255,21 +3488,22 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
     if (isBlankLine(line)) {
       continue;
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     z = 0;
     beta = 0;
     se_beta = 0;
-    chisq = 0;
-    pvalue = 0;
+    // chisq = 0;
+    // pvalue = 0;
     n_total = 0;
     n_mis = 0;
     n_obs = 0;
     n_case = 0;
     n_control = 0;
-    af = 0;
-    var_x = 0;
+    // af = 0;
+    // double var_x = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -4298,12 +3532,12 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
       if (header.sebeta_col != 0 && header.sebeta_col == i + 1) {
         se_beta = atof(ch_ptr);
       }
-      if (header.chisq_col != 0 && header.chisq_col == i + 1) {
-        chisq = atof(ch_ptr);
-      }
-      if (header.p_col != 0 && header.p_col == i + 1) {
-        pvalue = atof(ch_ptr);
-      }
+      // if (header.chisq_col != 0 && header.chisq_col == i + 1) {
+      //   chisq = atof(ch_ptr);
+      // }
+      // if (header.p_col != 0 && header.p_col == i + 1) {
+      //   pvalue = atof(ch_ptr);
+      // }
 
       if (header.n_col != 0 && header.n_col == i + 1) {
         n_total = atoi(ch_ptr);
@@ -4321,12 +3555,13 @@ void ReadFile_beta(const string &file_beta, const map<string, double> &mapRS2wA,
         n_control = atoi(ch_ptr);
       }
 
-      if (header.af_col != 0 && header.af_col == i + 1) {
-        af = atof(ch_ptr);
-      }
-      if (header.var_col != 0 && header.var_col == i + 1) {
-        var_x = atof(ch_ptr);
-      }
+      // if (header.af_col != 0 && header.af_col == i + 1) {
+      //   af = atof(ch_ptr);
+      // }
+
+      // if (header.var_col != 0 && header.var_col == i + 1) {
+      //   var_x = atof(ch_ptr);
+      // }
 
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -4540,8 +3775,8 @@ void ReadFile_vector(const string &file_vec, gsl_vector *vec) {
   char *ch_ptr;
 
   for (size_t i = 0; i < vec->size; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     gsl_vector_set(vec, i, atof(ch_ptr));
   }
 
@@ -4563,9 +3798,10 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat) {
   char *ch_ptr;
 
   for (size_t i = 0; i < mat->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -4590,18 +3826,20 @@ void ReadFile_matrix(const string &file_mat, gsl_matrix *mat1,
   char *ch_ptr;
 
   for (size_t i = 0; i < mat1->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat1->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat1, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }
   }
 
   for (size_t i = 0; i < mat2->size1; i++) {
-    !safeGetline(infile, line).eof();
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    safeGetline(infile, line).eof();
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t j = 0; j < mat2->size2; j++) {
+      enforce(ch_ptr);
       gsl_matrix_set(mat2, i, j, atof(ch_ptr));
       ch_ptr = strtok(NULL, " , \t");
     }
@@ -4621,7 +3859,7 @@ void ReadFile_study(const string &file_study, gsl_matrix *Vq_mat,
   string sfile = file_study + ".size.txt";
   string qfile = file_study + ".q.txt";
 
-  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+  gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
 
   ReadFile_matrix(Vqfile, Vq_mat);
   ReadFile_vector(sfile, s);
@@ -4646,7 +3884,7 @@ void ReadFile_ref(const string &file_ref, gsl_matrix *S_mat,
   string sfile = file_ref + ".size.txt";
   string Sfile = file_ref + ".S.txt";
 
-  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+  gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
 
   ReadFile_vector(sfile, s);
   ReadFile_matrix(Sfile, S_mat, Svar_mat);
@@ -4672,9 +3910,9 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq_mat,
   gsl_vector_set_zero(s_vec);
   ni = 0;
 
-  gsl_matrix *Vq_sub = gsl_matrix_alloc(Vq_mat->size1, Vq_mat->size2);
-  gsl_vector *q_sub = gsl_vector_alloc(q_vec->size);
-  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+  gsl_matrix *Vq_sub = gsl_matrix_safe_alloc(Vq_mat->size1, Vq_mat->size2);
+  gsl_vector *q_sub = gsl_vector_safe_alloc(q_vec->size);
+  gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
 
   igzstream infile(file_mstudy.c_str(), igzstream::in);
   if (!infile) {
@@ -4763,9 +4001,9 @@ void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
   gsl_vector_set_zero(s_vec);
   ni = 0;
 
-  gsl_matrix *S_sub = gsl_matrix_alloc(S_mat->size1, S_mat->size2);
-  gsl_matrix *Svar_sub = gsl_matrix_alloc(Svar_mat->size1, Svar_mat->size2);
-  gsl_vector *s = gsl_vector_alloc(s_vec->size + 1);
+  gsl_matrix *S_sub = gsl_matrix_safe_alloc(S_mat->size1, S_mat->size2);
+  gsl_matrix *Svar_sub = gsl_matrix_safe_alloc(Svar_mat->size1, Svar_mat->size2);
+  gsl_vector *s = gsl_vector_safe_alloc(s_vec->size + 1);
 
   igzstream infile(file_mref.c_str(), igzstream::in);
   if (!infile) {
diff --git a/src/io.h b/src/io.h
index d9253e3..dd1d5c0 100644
--- a/src/io.h
+++ b/src/io.h
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -32,8 +34,8 @@
 
 using namespace std;
 
-void ProgressBar(string str, double p, double total);
-void ProgressBar(string str, double p, double total, double ratio);
+void ProgressBar(string str, double p, double total, double ratio = -1.0);
+
 std::istream &safeGetline(std::istream &is, std::string &t);
 
 bool ReadFile_snps(const string file_snps, set<string> &setSnps);
@@ -64,7 +66,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
                    const double &r2_level, map<string, string> &mapRS2chr,
                    map<string, long int> &mapRS2bp,
                    map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
-                   size_t &ns_test, bool debug);
+                   size_t &ns_test);
 bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
                   const gsl_matrix *W, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
@@ -94,7 +96,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 
 bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
-                   const bool calc_K, bool debug);
+                   const bool calc_K);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                   const bool calc_K);
@@ -102,7 +104,7 @@ bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp,
                    vector<vector<unsigned char>> &Xt, gsl_matrix *K,
                    const bool calc_K, const size_t ni_test,
-                   const size_t ns_test, bool debug);
+                   const size_t ns_test);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
                   gsl_matrix *K, const bool calc_K, const size_t ni_test,
@@ -176,16 +178,6 @@ void ReadFile_mstudy(const string &file_mstudy, gsl_matrix *Vq,
                      gsl_vector *q_vec, gsl_vector *s_vec, size_t &ni);
 void ReadFile_mref(const string &file_mref, gsl_matrix *S_mat,
                    gsl_matrix *Svar_mat, gsl_vector *s_vec, size_t &ni);
-
-// WJA added.
-bool bgenKin(const string &file_geno, vector<int> &indicator_snp,
-             const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
-bool ReadFile_bgen(const string &file_bgen, const set<string> &setSnps,
-                   const gsl_matrix *W, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp, vector<SNPINFO> &snpInfo,
-                   const double &maf_level, const double &miss_level,
-                   const double &hwe_level, const double &r2_level,
-                   size_t &ns_test);
 bool ReadFile_sample(const string &file_sample,
                      vector<vector<int>> &indicator_pheno,
                      vector<vector<double>> &pheno,
diff --git a/src/lapack.cpp b/src/lapack.cpp
index ee0a497..d15446b 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -128,6 +128,10 @@ void lapack_dgemm(char *TransA, char *TransB, double alpha, const gsl_matrix *A,
   gsl_matrix *C_t = gsl_matrix_alloc(C->size2, C->size1);
   gsl_matrix_transpose_memcpy(C_t, C);
 
+  check_int_mult_overflow(M,K1);
+  check_int_mult_overflow(N,K1);
+  check_int_mult_overflow(M,N);
+
   dgemm_(TransA, TransB, &M, &N, &K1, &alpha, A_t->data, &LDA, B_t->data, &LDB,
          &beta, C_t->data, &LDC);
 
@@ -302,27 +306,6 @@ double LULndet(const gsl_matrix *LU) {
   return gsl_linalg_LU_lndet((gsl_matrix *)LU);
 }
 
-/*
-double LULndet(gsl_matrix_float *LU) {
-  gsl_matrix *LU_double = gsl_matrix_alloc(LU->size1, LU->size2);
-  double d;
-
-  // Copy float matrix to double.
-  for (size_t i = 0; i < LU->size1; i++) {
-    for (size_t j = 0; j < LU->size2; j++) {
-      gsl_matrix_set(LU_double, i, j, gsl_matrix_float_get(LU, i, j));
-    }
-  }
-
-  // LU decomposition.
-  d = gsl_linalg_LU_lndet(LU_double);
-
-  // Free matrix
-  gsl_matrix_free(LU_double);
-  return d;
-}
-*/
-
 // LU solve.
 void LUSolve(const gsl_matrix *LU, const gsl_permutation *p,
              const gsl_vector *b, gsl_vector *x) {
diff --git a/src/ldr.cpp b/src/ldr.cpp
index 3554efa..f70eb85 100644
--- a/src/ldr.cpp
+++ b/src/ldr.cpp
@@ -29,7 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
 #include "gsl/gsl_eigen.h"
@@ -46,7 +46,7 @@
 #include "param.h"
 
 using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
 
 void LDR::CopyFromParam(PARAM &cPar) {
   a_mode = cPar.a_mode;
@@ -70,8 +70,10 @@ void LDR::CopyFromParam(PARAM &cPar) {
   return;
 }
 
+
 void LDR::CopyToParam(PARAM &cPar) { return; }
 
+/*
 // X is a p by n matrix.
 void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
              const gsl_vector *y_gsl) {
@@ -107,3 +109,4 @@ void LDR::VB(const vector<vector<unsigned char>> &Xt, const gsl_matrix *W_gsl,
 
   return;
 }
+*/
diff --git a/src/lm.cpp b/src/lm.cpp
index 0c2a2bb..9132e81 100644
--- a/src/lm.cpp
+++ b/src/lm.cpp
@@ -39,7 +39,7 @@
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 
-#include "eigenlib.h"
+// #include "eigenlib.h"
 #include "gzstream.h"
 #include "lapack.h"
 #include "lm.h"
@@ -55,8 +55,6 @@ void LM::CopyFromParam(PARAM &cPar) {
   file_out = cPar.file_out;
   path_out = cPar.path_out;
   file_gene = cPar.file_gene;
-  // WJA added
-  file_oxford = cPar.file_oxford;
 
   time_opt = 0.0;
 
@@ -333,14 +331,14 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
   for (size_t t = 0; t < ng_total; t++) {
     getline(infile, line);
     if (t % d_pace == 0 || t == ng_total - 1) {
-      ProgressBar("Performing Analysis ", t, ng_total - 1);
+      ProgressBar("Performing Analysis", t, ng_total - 1);
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
 
     c_phen = 0;
     for (size_t i = 0; i < indicator_idv.size(); ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -381,232 +379,6 @@ void LM::AnalyzeGene(const gsl_matrix *W, const gsl_vector *x) {
   return;
 }
 
-// WJA added
-void LM::Analyzebgen(const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bgen = file_oxford + ".bgen";
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return;
-  }
-
-  clock_t time_start = clock();
-
-  string line;
-  char *ch_ptr;
-
-  double beta = 0, se = 0, p_wald = 0, p_lrt = 0, p_score = 0;
-  int n_miss, c_phen;
-  double geno, x_mean;
-
-  // Calculate some basic quantities.
-  double yPwy, xPwy, xPwx;
-  double df = (double)W->size1 - (double)W->size2 - 1.0;
-
-  gsl_vector *x = gsl_vector_alloc(W->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(W->size1);
-
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wty = gsl_vector_alloc(W->size2);
-  gsl_vector *Wtx = gsl_vector_alloc(W->size2);
-  gsl_permutation *pmt = gsl_permutation_alloc(W->size2);
-
-  gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
-  int sig;
-  LUDecomp(WtW, pmt, &sig);
-  LUInvert(WtW, pmt, WtWi);
-
-  gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
-  CalcvPv(WtWi, Wty, y, yPwy);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB;
-  double bgen_geno_prob_BB, bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  std::cout << "Warning: WJA hard coded SNP missingness "
-            << "threshold of 10%" << std::endl;
-
-  // Start reading genotypes and analyze.
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
-    }
-
-    // Read SNP header.
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(x_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
-      // WJA
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
-        n_miss++;
-      } else {
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
-      }
-      c_phen++;
-    }
-
-    x_mean /= static_cast<double>(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
-        gsl_vector_set(x, i, x_mean);
-      }
-      geno = gsl_vector_get(x, i);
-    }
-
-    // Calculate statistics.
-    time_start = clock();
-
-    gsl_blas_dgemv(CblasTrans, 1.0, W, x, 0.0, Wtx);
-    CalcvPv(WtWi, Wty, Wtx, y, x, xPwy, xPwx);
-    LmCalcP(a_mode - 50, yPwy, xPwy, xPwx, df, W->size1, beta, se, p_wald,
-            p_lrt, p_score);
-
-    time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-    // Store summary data.
-    SUMSTAT SNPs = {beta, se, 0.0, 0.0, p_wald, p_lrt, p_score, -0.0};
-    sumStat.push_back(SNPs);
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-
-  gsl_matrix_free(WtW);
-  gsl_matrix_free(WtWi);
-  gsl_vector_free(Wty);
-  gsl_vector_free(Wtx);
-  gsl_permutation_free(pmt);
-
-  infile.close();
-  infile.clear();
-
-  return;
-}
-
 void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
   debug_msg("entering");
   igzstream infile(file_geno.c_str(), igzstream::in);
@@ -649,22 +421,22 @@ void LM::AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y) {
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
     getline(infile, line);
     if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+      ProgressBar("Reading SNPs", t, ns_total - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     x_mean = 0.0;
     c_phen = 0;
     n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -775,7 +547,7 @@ void LM::AnalyzePlink(const gsl_matrix *W, const gsl_vector *y) {
 
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+      ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
diff --git a/src/lm.h b/src/lm.h
index cb22d3b..030e6f9 100644
--- a/src/lm.h
+++ b/src/lm.h
@@ -67,9 +67,6 @@ public:
   void AnalyzeGene(const gsl_matrix *W, const gsl_vector *x);
   void AnalyzePlink(const gsl_matrix *W, const gsl_vector *y);
   void AnalyzeBimbam(const gsl_matrix *W, const gsl_vector *y);
-  // WJA added.
-  void Analyzebgen(const gsl_matrix *W, const gsl_vector *y);
-
   void WriteFiles();
 };
 
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 134fbf9..4198fab 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -13,7 +15,7 @@
     GNU General Public License for more details.
 
     You should have received a copy of the GNU General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include <fstream>
@@ -38,11 +40,14 @@
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
 
-#include "eigenlib.h"
+// #include "eigenlib.h"
+
 #include "gzstream.h"
 #include "io.h"
+#include "fastblas.h"
 #include "lapack.h"
 #include "lmm.h"
+#include "mathfunc.h"
 
 using namespace std;
 
@@ -56,9 +61,6 @@ void LMM::CopyFromParam(PARAM &cPar) {
   path_out = cPar.path_out;
   file_gene = cPar.file_gene;
 
-  // WJA added.
-  file_oxford = cPar.file_oxford;
-
   l_min = cPar.l_min;
   l_max = cPar.l_max;
   n_region = cPar.n_region;
@@ -107,12 +109,13 @@ void LMM::WriteFiles() {
   }
 
   auto common_header = [&] () {
-    if (a_mode != 2)
+    if (a_mode != 2) {
       outfile << "beta" << "\t";
+      outfile << "se" << "\t";
+    }
 
-    outfile << "se" << "\t";
-
-    outfile << "logl_H1" << "\t";  // we may make this an option
+    if (!is_legacy_mode())
+      outfile << "logl_H1" << "\t";  // we may make this an option
 
     switch(a_mode) {
     case 1:
@@ -139,12 +142,13 @@ void LMM::WriteFiles() {
   auto sumstats = [&] (SUMSTAT st) {
     outfile << scientific << setprecision(6);
 
-    if (a_mode != 2)
+    if (a_mode != 2) {
       outfile << st.beta << "\t";
+      outfile << st.se << "\t";
+    }
 
-    outfile << st.se << "\t";
-
-    outfile << st.logl_H1 << "\t";
+    if (!is_legacy_mode())
+      outfile << st.logl_H1 << "\t";
 
     switch(a_mode) {
     case 1:
@@ -229,6 +233,7 @@ void CalcPab(const size_t n_cvt, const size_t e_mode, const gsl_vector *Hi_eval,
               gsl_matrix_const_column(Uab, index_ab);
           gsl_blas_ddot(Hi_eval, &Uab_col.vector, &p_ab);
           if (e_mode != 0) {
+            assert(false);
             p_ab = gsl_vector_get(ab, index_ab) - p_ab;
           }
           gsl_matrix_set(Pab, 0, index_ab, p_ab);
@@ -364,16 +369,16 @@ double LogL_f(double l, void *params) {
   double f = 0.0, logdet_h = 0.0, d;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
@@ -390,11 +395,13 @@ double LogL_f(double l, void *params) {
 
   index_yy = GetabIndex(n_cvt + 2, n_cvt + 2, n_cvt);
   double P_yy = gsl_matrix_get(Pab, nc_total, index_yy);
-  f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(v_temp);
+  assert(!is_nan(P_yy));
+  f = c - 0.5 * logdet_h - 0.5 * (double)ni_test * log(P_yy);
+  assert(!is_nan(f));
+  gsl_matrix_safe_free(Pab); // FIXME
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(v_temp);
   return f;
 }
 
@@ -414,23 +421,23 @@ double LogL_dev1(double l, void *params) {
   double dev1 = 0.0, trace_Hi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -452,11 +459,11 @@ double LogL_dev1(double l, void *params) {
   double yPKPy = (P_yy - PP_yy) / l;
   dev1 = -0.5 * trace_HiK + 0.5 * (double)ni_test * yPKPy / P_yy;
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);   // FIXME: may contain NaN
+  gsl_matrix_safe_free(PPab);  // FIXME: may contain NaN
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return dev1;
 }
@@ -477,27 +484,27 @@ double LogL_dev2(double l, void *params) {
   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
-  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
   gsl_vector_mul(HiHiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -527,13 +534,13 @@ double LogL_dev2(double l, void *params) {
          0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
              (P_yy * P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_matrix_free(PPPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(HiHiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);  // FIXME
+  gsl_matrix_safe_free(PPab);
+  gsl_matrix_safe_free(PPPab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(HiHiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return dev2;
 }
@@ -554,27 +561,27 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
   double trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
-  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
   gsl_vector_mul(HiHiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -607,13 +614,13 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
           0.5 * (double)ni_test * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) /
               (P_yy * P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_matrix_free(PPPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(HiHiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);   // FIXME: may contain NaN
+  gsl_matrix_safe_free(PPab);  // FIXME: may contain NaN
+  gsl_matrix_safe_free(PPPab); // FIXME: may contain NaN
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(HiHiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return;
 }
@@ -637,17 +644,17 @@ double LogRL_f(double l, void *params) {
   double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *Iab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
@@ -676,10 +683,10 @@ double LogRL_f(double l, void *params) {
   double c = 0.5 * df * (log(df) - log(2 * M_PI) - 1.0);
   f = c - 0.5 * logdet_h - 0.5 * logdet_hiw - 0.5 * df * log(P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(Iab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);
+  gsl_matrix_safe_free(Iab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(v_temp);
   return f;
 }
 
@@ -702,23 +709,23 @@ double LogRL_dev1(double l, void *params) {
   double dev1 = 0.0, trace_Hi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -750,11 +757,11 @@ double LogRL_dev1(double l, void *params) {
 
   dev1 = -0.5 * trace_PK + 0.5 * df * yPKPy / P_yy;
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);  // FIXME: may contain NaN
+  gsl_matrix_safe_free(PPab); // FIXME: may contain NaN
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return dev1;
 }
@@ -778,27 +785,27 @@ double LogRL_dev2(double l, void *params) {
   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
-  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
   gsl_vector_mul(HiHiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -838,13 +845,13 @@ double LogRL_dev2(double l, void *params) {
   dev2 = 0.5 * trace_PKPK -
          0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_matrix_free(PPPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(HiHiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);  // FIXME
+  gsl_matrix_safe_free(PPab);
+  gsl_matrix_safe_free(PPPab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(HiHiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return dev2;
 }
@@ -868,27 +875,27 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
   double trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
-  gsl_vector_memcpy(v_temp, p->eval);
+  gsl_vector_safe_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
   if (p->e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
-  gsl_vector_memcpy(HiHi_eval, Hi_eval);
+  gsl_vector_safe_memcpy(HiHi_eval, Hi_eval);
   gsl_vector_mul(HiHi_eval, Hi_eval);
-  gsl_vector_memcpy(HiHiHi_eval, HiHi_eval);
+  gsl_vector_safe_memcpy(HiHiHi_eval, HiHi_eval);
   gsl_vector_mul(HiHiHi_eval, Hi_eval);
 
   gsl_vector_set_all(v_temp, 1.0);
@@ -930,13 +937,13 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
   *dev2 = 0.5 * trace_PKPK -
           0.5 * df * (2.0 * yPKPKPy * P_yy - yPKPy * yPKPy) / (P_yy * P_yy);
 
-  gsl_matrix_free(Pab);
-  gsl_matrix_free(PPab);
-  gsl_matrix_free(PPPab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(HiHi_eval);
-  gsl_vector_free(HiHiHi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);  // FIXME
+  gsl_matrix_safe_free(PPab);
+  gsl_matrix_safe_free(PPPab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(HiHi_eval);
+  gsl_vector_safe_free(HiHiHi_eval);
+  gsl_vector_safe_free(v_temp);
 
   return;
 }
@@ -948,16 +955,16 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
 
   int df = (int)ni_test - (int)n_cvt - 1;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
 
-  gsl_vector_memcpy(v_temp, params.eval);
+  gsl_vector_safe_memcpy(v_temp, params.eval);
   gsl_vector_scale(v_temp, l);
   if (params.e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
@@ -977,9 +984,9 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
   se = sqrt(1.0 / (tau * P_xx));
   p_wald = gsl_cdf_fdist_Q((P_yy - Px_yy) * tau, 1.0, df);
 
-  gsl_matrix_free(Pab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(v_temp);
   return;
 }
 
@@ -990,16 +997,16 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
 
   int df = (int)ni_test - (int)n_cvt - 1;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
 
-  gsl_vector_memcpy(v_temp, params.eval);
+  gsl_vector_safe_memcpy(v_temp, params.eval);
   gsl_vector_scale(v_temp, l);
   if (params.e_mode == 0) {
     gsl_vector_set_all(Hi_eval, 1.0);
   } else {
-    gsl_vector_memcpy(Hi_eval, v_temp);
+    gsl_vector_safe_memcpy(Hi_eval, v_temp);
   }
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
@@ -1021,9 +1028,9 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
   p_score =
       gsl_cdf_fdist_Q((double)ni_test * P_xy * P_xy / (P_yy * P_xx), 1.0, df);
 
-  gsl_matrix_free(Pab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(v_temp);
+  gsl_matrix_safe_free(Pab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(v_temp);
   return;
 }
 
@@ -1031,7 +1038,7 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
   size_t index_ab;
   size_t n_cvt = UtW->size2;
 
-  gsl_vector *u_a = gsl_vector_alloc(Uty->size);
+  gsl_vector *u_a = gsl_vector_safe_alloc(Uty->size);
 
   for (size_t a = 1; a <= n_cvt + 2; ++a) {
     if (a == n_cvt + 1) {
@@ -1039,10 +1046,10 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
     }
 
     if (a == n_cvt + 2) {
-      gsl_vector_memcpy(u_a, Uty);
+      gsl_vector_safe_memcpy(u_a, Uty);
     } else {
       gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, a - 1);
-      gsl_vector_memcpy(u_a, &UtW_col.vector);
+      gsl_vector_safe_memcpy(u_a, &UtW_col.vector);
     }
 
     for (size_t b = a; b >= 1; --b) {
@@ -1054,17 +1061,17 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
       gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
 
       if (b == n_cvt + 2) {
-        gsl_vector_memcpy(&Uab_col.vector, Uty);
+        gsl_vector_safe_memcpy(&Uab_col.vector, Uty);
       } else {
         gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
-        gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+        gsl_vector_safe_memcpy(&Uab_col.vector, &UtW_col.vector);
       }
 
       gsl_vector_mul(&Uab_col.vector, u_a);
     }
   }
 
-  gsl_vector_free(u_a);
+  gsl_vector_safe_free(u_a);
   return;
 }
 
@@ -1078,12 +1085,12 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty,
     gsl_vector_view Uab_col = gsl_matrix_column(Uab, index_ab);
 
     if (b == n_cvt + 2) {
-      gsl_vector_memcpy(&Uab_col.vector, Uty);
+      gsl_vector_safe_memcpy(&Uab_col.vector, Uty);
     } else if (b == n_cvt + 1) {
-      gsl_vector_memcpy(&Uab_col.vector, Utx);
+      gsl_vector_safe_memcpy(&Uab_col.vector, Utx);
     } else {
       gsl_vector_const_view UtW_col = gsl_matrix_const_column(UtW, b - 1);
-      gsl_vector_memcpy(&Uab_col.vector, &UtW_col.vector);
+      gsl_vector_safe_memcpy(&Uab_col.vector, &UtW_col.vector);
     }
 
     gsl_vector_mul(&Uab_col.vector, Utx);
@@ -1097,8 +1104,8 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
   size_t n_cvt = W->size2;
 
   double d;
-  gsl_vector *v_a = gsl_vector_alloc(y->size);
-  gsl_vector *v_b = gsl_vector_alloc(y->size);
+  gsl_vector *v_a = gsl_vector_safe_alloc(y->size);
+  gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
 
   for (size_t a = 1; a <= n_cvt + 2; ++a) {
     if (a == n_cvt + 1) {
@@ -1106,10 +1113,10 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
     }
 
     if (a == n_cvt + 2) {
-      gsl_vector_memcpy(v_a, y);
+      gsl_vector_safe_memcpy(v_a, y);
     } else {
       gsl_vector_const_view W_col = gsl_matrix_const_column(W, a - 1);
-      gsl_vector_memcpy(v_a, &W_col.vector);
+      gsl_vector_safe_memcpy(v_a, &W_col.vector);
     }
 
     for (size_t b = a; b >= 1; --b) {
@@ -1120,10 +1127,10 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
       index_ab = GetabIndex(a, b, n_cvt);
 
       if (b == n_cvt + 2) {
-        gsl_vector_memcpy(v_b, y);
+        gsl_vector_safe_memcpy(v_b, y);
       } else {
         gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
-        gsl_vector_memcpy(v_b, &W_col.vector);
+        gsl_vector_safe_memcpy(v_b, &W_col.vector);
       }
 
       gsl_blas_ddot(v_a, v_b, &d);
@@ -1131,8 +1138,8 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
     }
   }
 
-  gsl_vector_free(v_a);
-  gsl_vector_free(v_b);
+  gsl_vector_safe_free(v_a);
+  gsl_vector_safe_free(v_b);
   return;
 }
 
@@ -1142,31 +1149,32 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
   size_t n_cvt = W->size2;
 
   double d;
-  gsl_vector *v_b = gsl_vector_alloc(y->size);
+  gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
 
   for (size_t b = 1; b <= n_cvt + 2; ++b) {
     index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
 
     if (b == n_cvt + 2) {
-      gsl_vector_memcpy(v_b, y);
+      gsl_vector_safe_memcpy(v_b, y);
     } else if (b == n_cvt + 1) {
-      gsl_vector_memcpy(v_b, x);
+      gsl_vector_safe_memcpy(v_b, x);
     } else {
       gsl_vector_const_view W_col = gsl_matrix_const_column(W, b - 1);
-      gsl_vector_memcpy(v_b, &W_col.vector);
+      gsl_vector_safe_memcpy(v_b, &W_col.vector);
     }
 
     gsl_blas_ddot(x, v_b, &d);
     gsl_vector_set(ab, index_ab, d);
   }
 
-  gsl_vector_free(v_b);
+  gsl_vector_safe_free(v_b);
   return;
 }
 
 void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
                       const gsl_matrix *UtW, const gsl_vector *Utx,
                       const gsl_matrix *W, const gsl_vector *x) {
+  debug_msg(file_gene);
   igzstream infile(file_gene.c_str(), igzstream::in);
   if (!infile) {
     cout << "error reading gene expression file:" << file_gene << endl;
@@ -1188,25 +1196,25 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_vector *y = gsl_vector_alloc(U->size1);
-  gsl_vector *Uty = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *y = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Uty = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   // Header.
   getline(infile, line);
 
   for (size_t t = 0; t < ng_total; t++) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == ng_total - 1) {
-      ProgressBar("Performing Analysis ", t, ng_total - 1);
+      ProgressBar("Performing Analysis", t, ng_total - 1);
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
 
     c_phen = 0;
     for (size_t i = 0; i < indicator_idv.size(); ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -1260,10 +1268,10 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   }
   cout << endl;
 
-  gsl_vector_free(y);
-  gsl_vector_free(Uty);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_vector_safe_free(y);
+  gsl_vector_safe_free(Uty);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
   infile.close();
   infile.clear();
@@ -1271,35 +1279,37 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   return;
 }
 
-void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
-                        const gsl_matrix *UtW, const gsl_vector *Uty,
-                        const gsl_matrix *W, const gsl_vector *y,
-                        const set<string> gwasnps) {
-  debug_msg("entering");
+
+void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
+                  const gsl_matrix *U, const gsl_vector *eval,
+                  const gsl_matrix *UtW, const gsl_vector *Uty,
+                  const gsl_matrix *W, const gsl_vector *y,
+                  const set<string> gwasnps) {
   clock_t time_start = clock();
 
-  // LOCO support
+  // Subset/LOCO support
   bool process_gwasnps = gwasnps.size();
   if (process_gwasnps)
-    debug_msg("AnalyzeBimbam w. LOCO");
+    debug_msg("Analyze subset of SNPs (LOCO)");
 
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
   const size_t inds = U->size1;
-  gsl_vector *x = gsl_vector_alloc(inds); // #inds
-  gsl_vector *x_miss = gsl_vector_alloc(inds);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  enforce(inds == ni_test);
+  gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds
+  gsl_vector *x_miss = gsl_vector_safe_alloc(inds);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   // Create a large matrix with LMM_BATCH_SIZE columns for batched processing
   // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE);
   const size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize);
-
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize);
   enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
+  enforce(Xlarge->size1 == inds);
   gsl_matrix_set_zero(Xlarge);
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -1307,9 +1317,6 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
   // start reading genotypes and analyze
   size_t c = 0;
 
-  igzstream infile(file_geno.c_str(), igzstream::in);
-  enforce_msg(infile, "error reading genotype file");
-
   auto batch_compute = [&](size_t l) { // using a C++ closure
     // Compute SNPs in batch, note the computations are independent per SNP
     gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
@@ -1317,7 +1324,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
         gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
 
     time_start = clock();
-    eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+    fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
                    &UtXlarge_sub.matrix);
     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
@@ -1325,15 +1332,15 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
     for (size_t i = 0; i < l; i++) {
       // for every batch...
       gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-      gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+      gsl_vector_safe_memcpy(Utx, &UtXlarge_col.vector);
 
       CalcUab(UtW, Uty, Utx, Uab);
 
       time_start = clock();
       FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
 
-      double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-      double p_lrt = 0, p_score = 0;
+      double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0;
+      double p_lrt = 0.0, p_score = 0.0;
       double logl_H1 = 0.0;
 
       // 3 is before 1.
@@ -1361,183 +1368,200 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
     }
   };
 
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    // for every SNP
-    string line;
-    safeGetline(infile, line);
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+  const auto num_snps = indicator_snp.size();
+  enforce_msg(num_snps > 0,"Zero SNPs to process - data corrupt?");
+  if (num_snps < 50) {
+    cerr << num_snps << " SNPs" << endl;
+    warning_msg("very few SNPs processed");
+  }
+  const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace);
+
+  for (size_t t = 0; t < num_snps; ++t) {
+    if (t % progress_step == 0 || t == (num_snps - 1)) {
+      ProgressBar("Reading SNPs", t, num_snps - 1);
     }
     if (indicator_snp[t] == 0)
       continue;
 
-    char *ch_ptr = strtok((char *)line.c_str(), " , \t");
-    auto snp = string(ch_ptr);
+    auto tup = fetch_snp(t);
+    auto snp = get<0>(tup);
+    auto gs = get<1>(tup);
+
     // check whether SNP is included in gwasnps (used by LOCO)
     if (process_gwasnps && gwasnps.count(snp) == 0)
       continue;
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
 
-    double x_mean = 0.0;
-    int c_phen = 0;
-    int n_miss = 0;
+    // drop missing idv and plug mean values for missing geno
+    double x_total = 0.0; // sum genotype values to compute x_mean
+    uint pos = 0;         // position in target vector
+    uint n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
       // get the genotypes per individual and compute stats per SNP
-      ch_ptr = strtok(NULL, " , \t");
-      if (indicator_idv[i] == 0)
+      if (indicator_idv[i] == 0) // skip individual
         continue;
 
-      if (strcmp(ch_ptr, "NA") == 0) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
+      double geno = gs[i];
+      if (std::isnan(geno)) {
+        gsl_vector_set(x_miss, pos, 1.0);
         n_miss++;
       } else {
-        double geno = atof(ch_ptr);
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
+        gsl_vector_set(x, pos, geno);
+        x_total += geno;
       }
-      c_phen++;
+      pos++;
     }
+    enforce(pos == ni_test);
 
-    x_mean /= (double)(ni_test - n_miss);
+    const double x_mean = x_total/(double)(ni_test - n_miss);
 
+    // plug x_mean back into missing values
     for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
+      if (gsl_vector_get(x_miss, i) == 1.0) {
         gsl_vector_set(x, i, x_mean);
       }
     }
+
+    /* this is what below GxE does
+    for (size_t i = 0; i < ni_test; ++i) {
+      auto geno = gsl_vector_get(x, i);
+      if (std::isnan(geno)) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+      if (x_mean > 1.0) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+    */
+    enforce(x->size == ni_test);
+
     // copy genotype values for SNP into Xlarge cache
     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
+    gsl_vector_safe_memcpy(&Xlarge_col.vector, x);
     c++; // count SNPs going in
 
     if (c % msize == 0)
       batch_compute(msize);
   }
   batch_compute(c % msize);
+  ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
   // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
   cout << endl;
 
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_vector_safe_free(x);
+  gsl_vector_safe_free(x_miss);
+  gsl_vector_safe_free(Utx);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
+  gsl_matrix_safe_free(Xlarge);
+  gsl_matrix_safe_free(UtXlarge);
 
-  infile.close();
-  infile.clear();
-
-  return;
 }
 
-void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
-                       const gsl_matrix *UtW, const gsl_vector *Uty,
-                       const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bed = file_bfile + ".bed";
-  ifstream infile(file_bed.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bed file:" << file_bed << endl;
-    return;
-  }
+void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_vector *Uty,
+                        const gsl_matrix *W, const gsl_vector *y,
+                        const set<string> gwasnps) {
+  debug_msg(file_geno);
 
-  clock_t time_start = clock();
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  enforce_msg(infile, "error reading genotype file");
+  size_t prev_line = 0;
 
-  char ch[1];
-  bitset<8> b;
+  std::vector <double> gs;
+  gs.resize(ni_total);
 
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_bit, n_miss, ci_total, ci_test;
-  double geno, x_mean;
+  // fetch_snp is a callback function for every SNP row
+  std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
+    string line;
+    while (prev_line <= num) {
+      // also read SNPs that were skipped
+      safeGetline(infile, line);
+      prev_line++;
+    }
+    char *ch_ptr = strtok((char *)line.c_str(), " , \t");
+    enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL
 
-  // Calculate basic quantities.
-  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+    auto snp = string(ch_ptr);
+    ch_ptr = strtok_safe(NULL, " , \t"); // skip column
+    ch_ptr = strtok_safe(NULL, " , \t"); // skip column
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+    gs.assign (ni_total,nan("")); // wipe values
 
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      enforce_msg(ch_ptr,line.c_str());
+      if (strcmp(ch_ptr, "NA") != 0)
+        gs[i] = atof(ch_ptr);
+    }
+    return std::make_tuple(snp,gs);
+  };
 
-  gsl_matrix_set_zero(Uab);
-  CalcUab(UtW, Uty, Uab);
+  LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
+
+  infile.close();
+  infile.clear();
+}
+
+void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                       const gsl_matrix *UtW, const gsl_vector *Uty,
+                       const gsl_matrix *W, const gsl_vector *y,
+                       const set<string> gwasnps) {
+  string file_bed = file_bfile + ".bed";
+  debug_msg(file_bed);
 
+  ifstream infile(file_bed.c_str(), ios::binary);
+  enforce_msg(infile,"error reading genotype (.bed) file");
+
+  char ch[1]; // 1 byte buffer
   // Calculate n_bit and c, the number of bit for each SNP.
-  if (ni_total % 4 == 0) {
-    n_bit = ni_total / 4;
-  } else {
-    n_bit = ni_total / 4 + 1;
-  }
+  const size_t n_bit = (ni_total % 4 == 0 ? ni_total / 4 : ni_total / 4 + 1);
 
-  // Print the first three magic numbers.
+  // first three magic numbers.
   for (int i = 0; i < 3; ++i) {
     infile.read(ch, 1);
-    b = ch[0];
+    // const bitset<8> b = ch[0];  b is never used
   }
 
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < snpInfo.size(); ++t) {
-    if (indicator_snp[t] == 0)
-      continue;
-    t_last++;
-  }
-  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
-    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
+  std::vector <double> gs;
+  gs.resize(ni_total);
 
+  // fetch_snp is a callback function for every SNP row
+  std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
+    gs.assign (ni_total,nan("")); // wipe values
     // n_bit, and 3 is the number of magic numbers.
+    auto t = num;
     infile.seekg(t * n_bit + 3);
-
-    // Read genotypes.
-    x_mean = 0.0;
-    n_miss = 0;
-    ci_total = 0;
-    ci_test = 0;
-    for (int i = 0; i < n_bit; ++i) {
+    auto ci_total = 0;
+    auto ci_test = 0;
+    // ---- for all genotypes
+    for (uint i = 0; i < n_bit; ++i) {
       infile.read(ch, 1);
-      b = ch[0];
+      bitset<8> bset8 = ch[0];
 
       // Minor allele homozygous: 2.0; major: 0.0.
       for (size_t j = 0; j < 4; ++j) {
         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
           break;
         }
-        if (indicator_idv[ci_total] == 0) {
+        if (indicator_idv[ci_total] == 0) { // skip individual
           ci_total++;
           continue;
         }
 
-        if (b[2 * j] == 0) {
-          if (b[2 * j + 1] == 0) {
-            gsl_vector_set(x, ci_test, 2);
-            x_mean += 2.0;
+        if (bset8[2 * j] == 0) {
+          if (bset8[2 * j + 1] == 0) {
+            gs[ci_test] = 2.0;
           } else {
-            gsl_vector_set(x, ci_test, 1);
-            x_mean += 1.0;
+            gs[ci_test] = 1.0;
           }
         } else {
-          if (b[2 * j + 1] == 1) {
-            gsl_vector_set(x, ci_test, 0);
+          if (bset8[2 * j + 1] == 1) {
+            gs[ci_test] = 0.0;
           } else {
-            gsl_vector_set(x, ci_test, -9);
-            n_miss++;
+            gs[ci_test] = nan(""); // already set to NaN - originally was -9.0
           }
         }
 
@@ -1545,367 +1569,14 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
         ci_test++;
       }
     }
+    string snp="unknown";
+    return std::make_tuple(snp,gs);
+  };
 
-    x_mean /= (double)(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      geno = gsl_vector_get(x, i);
-      if (geno == -9) {
-        gsl_vector_set(x, i, x_mean);
-        geno = x_mean;
-      }
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1, for beta.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score, logl_H1};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
-
-  infile.close();
-  infile.clear();
-
-  return;
-}
-
-// WJA added.
-void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
-                      const gsl_matrix *UtW, const gsl_vector *Uty,
-                      const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bgen = file_oxford + ".bgen";
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return;
-  }
-
-  clock_t time_start = clock();
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_miss, c_phen;
-  double geno, x_mean;
-
-  // Calculate basic quantities.
-  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
-
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
-
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
-
-  gsl_matrix_set_zero(Uab);
-  CalcUab(UtW, Uty, Uab);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-  double bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  std::cout << "Warning: WJA hard coded SNP missingness "
-            << "threshold of 10%" << std::endl;
-
-  // Start reading genotypes and analyze.
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-    t_last++;
-  }
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-
-    // Read SNP header.
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(x_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
-      // WJA.
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
-        n_miss++;
-      } else {
-
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
-      }
-      c_phen++;
-    }
-
-    x_mean /= static_cast<double>(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
-        gsl_vector_set(x, i, x_mean);
-      }
-      geno = gsl_vector_get(x, i);
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score, logl_H1};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
+  LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
 
   infile.close();
   infile.clear();
-
-  return;
 }
 
 void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
@@ -1914,10 +1585,10 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
                   vector<pair<size_t, double>> &pos_loglr) {
   double logl_H0, logl_H1, log_lr, lambda0, lambda1;
 
-  gsl_vector *w = gsl_vector_alloc(Uty->size);
-  gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1);
-  gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6);
-  gsl_vector *ab = gsl_vector_alloc(6);
+  gsl_vector *w = gsl_vector_safe_alloc(Uty->size);
+  gsl_matrix *Utw = gsl_matrix_safe_alloc(Uty->size, 1);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(Uty->size, 6);
+  gsl_vector *ab = gsl_vector_safe_alloc(6);
 
   gsl_vector_set_zero(ab);
   gsl_vector_set_all(w, 1.0);
@@ -1940,10 +1611,10 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
     pos_loglr.push_back(make_pair(i, log_lr));
   }
 
-  gsl_vector_free(w);
-  gsl_matrix_free(Utw);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_vector_safe_free(w);
+  gsl_matrix_safe_free(Utw);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
   return;
 }
@@ -2122,8 +1793,8 @@ void CalcLambda(const char func_name, const gsl_vector *eval,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -2132,8 +1803,8 @@ void CalcLambda(const char func_name, const gsl_vector *eval,
 
   CalcLambda(func_name, param0, l_min, l_max, n_region, lambda, logl_H0);
 
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
   return;
 }
@@ -2145,8 +1816,8 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -2158,8 +1829,8 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
   pve = trace_G * lambda / (trace_G * lambda + 1.0);
   pve_se = trace_G / ((trace_G * lambda + 1.0) * (trace_G * lambda + 1.0)) * se;
 
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
   return;
 }
 
@@ -2172,27 +1843,27 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(eval->size);
-  gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2);
-  gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2);
-  gsl_vector *WHiy = gsl_vector_alloc(UtW->size2);
-  gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(eval->size);
+  gsl_matrix *HiW = gsl_matrix_safe_alloc(eval->size, UtW->size2);
+  gsl_matrix *WHiW = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
+  gsl_vector *WHiy = gsl_vector_safe_alloc(UtW->size2);
+  gsl_matrix *Vbeta = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
 
-  gsl_vector_memcpy(v_temp, eval);
+  gsl_vector_safe_memcpy(v_temp, eval);
   gsl_vector_scale(v_temp, lambda);
   gsl_vector_set_all(Hi_eval, 1.0);
   gsl_vector_add_constant(v_temp, 1.0);
   gsl_vector_div(Hi_eval, v_temp);
 
   // Calculate beta.
-  gsl_matrix_memcpy(HiW, UtW);
+  gsl_matrix_safe_memcpy(HiW, UtW);
   for (size_t i = 0; i < UtW->size2; i++) {
     gsl_vector_view HiW_col = gsl_matrix_column(HiW, i);
     gsl_vector_mul(&HiW_col.vector, Hi_eval);
@@ -2223,15 +1894,15 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
     gsl_vector_set(se_beta, i, sqrt(gsl_matrix_get(Vbeta, i, i)));
   }
 
-  gsl_matrix_free(Uab);
-  gsl_matrix_free(Pab);
-  gsl_vector_free(ab);
-  gsl_vector_free(Hi_eval);
-  gsl_vector_free(v_temp);
-  gsl_matrix_free(HiW);
-  gsl_matrix_free(WHiW);
-  gsl_vector_free(WHiy);
-  gsl_matrix_free(Vbeta);
+  gsl_matrix_safe_free(Uab);
+  gsl_matrix_safe_free(Pab);
+  gsl_vector_safe_free(ab);
+  gsl_vector_safe_free(Hi_eval);
+  gsl_vector_safe_free(v_temp);
+  gsl_matrix_safe_free(HiW);
+  gsl_matrix_safe_free(WHiW);
+  gsl_vector_safe_free(WHiy);
+  gsl_matrix_safe_free(Vbeta);
 
   gsl_permutation_free(pmt);
   return;
@@ -2262,40 +1933,40 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *x_miss = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
-  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
   gsl_matrix_view UtW_expand_mat =
       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
-  gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+  gsl_matrix_safe_memcpy(&UtW_expand_mat.matrix, UtW);
   gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
   gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
 
   // Start reading genotypes and analyze.
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+      ProgressBar("Reading SNPs", t, ns_total - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     x_mean = 0.0;
     c_phen = 0;
     n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -2372,13 +2043,13 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
   }
   cout << endl;
 
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_vector_safe_free(x);
+  gsl_vector_safe_free(x_miss);
+  gsl_vector_safe_free(Utx);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
-  gsl_matrix_free(UtW_expand);
+  gsl_matrix_safe_free(UtW_expand);
 
   infile.close();
   infile.clear();
@@ -2390,8 +2061,8 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_vector *Uty,
                           const gsl_matrix *W, const gsl_vector *y,
                           const gsl_vector *env) {
-  debug_msg("entering");
   string file_bed = file_bfile + ".bed";
+  debug_msg(file_bed);
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
     cout << "error reading bed file:" << file_bed << endl;
@@ -2412,15 +2083,15 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
-  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
   gsl_matrix_view UtW_expand_mat =
       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
-  gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
+  gsl_matrix_safe_memcpy(&UtW_expand_mat.matrix, UtW);
   gsl_vector_view UtW_expand_env = gsl_matrix_column(UtW_expand, UtW->size2);
   gsl_blas_dgemv(CblasTrans, 1.0, U, env, 0.0, &UtW_expand_env.vector);
   gsl_vector_view UtW_expand_x = gsl_matrix_column(UtW_expand, UtW->size2 + 1);
@@ -2440,7 +2111,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
 
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+      ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
@@ -2550,12 +2221,12 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
   }
   cout << endl;
 
-  gsl_vector_free(x);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
+  gsl_vector_safe_free(x);
+  gsl_vector_safe_free(Utx);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_safe_free(ab);
 
-  gsl_matrix_free(UtW_expand);
+  gsl_matrix_safe_free(UtW_expand);
 
   infile.close();
   infile.clear();
diff --git a/src/lmm.h b/src/lmm.h
index 4d57ab1..e9740d5 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -23,10 +25,12 @@
 #include "gsl/gsl_vector.h"
 #include "io.h"
 #include "param.h"
+#include <functional>
+#include <tuple>
 
 using namespace std;
 
-#define LMM_BATCH_SIZE 10000 // used for batch processing
+#define LMM_BATCH_SIZE 20000 // used for batch processing
 
 class FUNC_PARAM {
 
@@ -40,6 +44,8 @@ public:
   size_t e_mode;
 };
 
+typedef std::tuple<string,std::vector<double> > SnpNameValues;
+
 class LMM {
 
 public:
@@ -53,8 +59,6 @@ public:
   string path_out;
 
   string file_gene;
-  // WJA added
-  string file_oxford;
 
   // LMM related parameters
   double l_min;
@@ -91,17 +95,19 @@ public:
   void AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
                    const gsl_matrix *UtW, const gsl_vector *Utx,
                    const gsl_matrix *W, const gsl_vector *x);
-  void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
-                    const gsl_matrix *UtW, const gsl_vector *Uty,
-                    const gsl_matrix *W, const gsl_vector *y);
-  // WJA added.
-  void Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
-                   const gsl_matrix *UtW, const gsl_vector *Uty,
-                   const gsl_matrix *W, const gsl_vector *y);
+  void Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
+               const gsl_matrix *U, const gsl_vector *eval,
+               const gsl_matrix *UtW, const gsl_vector *Uty,
+               const gsl_matrix *W, const gsl_vector *y,
+               const set<string> gwasnps);
   void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                      const gsl_matrix *UtW, const gsl_vector *Uty,
                      const gsl_matrix *W, const gsl_vector *y,
                      const set<string> gwasnps);
+  void AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                    const gsl_matrix *UtW, const gsl_vector *Uty,
+                    const gsl_matrix *W, const gsl_vector *y,
+                    const set<string> gwasnps);
   void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
                        const gsl_matrix *UtW, const gsl_vector *Uty,
                        const gsl_matrix *W, const gsl_vector *y,
diff --git a/src/logistic.cpp b/src/logistic.cpp
index 2dd0402..fd5f0d1 100644
--- a/src/logistic.cpp
+++ b/src/logistic.cpp
@@ -7,6 +7,7 @@
 #include <stdio.h>
 
 #include "logistic.h"
+#include "debug.h"
 
 // I need to bundle all the data that goes to the function to optimze
 // together.
@@ -42,12 +43,12 @@ double fLogit_mixed(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
   for (int i = 0; i < n; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < X->size2; ++k) {
+    for (size_t k = 0; k < X->size2; ++k) {
       if (gsl_matrix_int_get(X, i, k) > 0)
         Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
       iParm += nlev->data[k] - 1;
     }
-    for (int k = 0; k < (Xc->size2); ++k)
+    for (size_t k = 0; k < (Xc->size2); ++k)
       Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
     total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
   }
@@ -62,16 +63,16 @@ void logistic_mixed_pred(gsl_vector *beta,     // Vector of parameters
                                                // obs x Kc (NULL if not used).
                          gsl_vector *yhat) {   // Vector of prob. predicted by
                                                // the logistic
-  for (int i = 0; i < X->size1; ++i) {
+  for (size_t i = 0; i < X->size1; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < X->size2; ++k) {
+    for (size_t k = 0; k < X->size2; ++k) {
       if (gsl_matrix_int_get(X, i, k) > 0)
         Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
       iParm += nlev->data[k] - 1;
     }
     // Adding the continuous.
-    for (int k = 0; k < (Xc->size2); ++k)
+    for (size_t k = 0; k < (Xc->size2); ++k)
       Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
     yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
   }
@@ -135,7 +136,7 @@ void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params,
   int K = p->X->size2;
   int Kc = p->Xc->size2;
   int npar = beta->size;
-  gsl_vector *gn = gsl_vector_alloc(npar); // gn
+  gsl_vector *gn = gsl_vector_safe_alloc(npar); // gn
 
   // Intitialize Hessian out necessary ???
   gsl_matrix_set_zero(out);
@@ -191,11 +192,8 @@ void wgsl_mixed_optim_hessian(const gsl_vector *beta, void *params,
 }
 
 double wgsl_mixed_optim_f(gsl_vector *v, void *params) {
-  double mLogLik = 0;
   fix_parm_mixed_T *p = (fix_parm_mixed_T *)params;
-  mLogLik =
-      fLogit_mixed(v, p->X, p->nlev, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
-  return mLogLik;
+  return fLogit_mixed(v, p->X, p->nlev, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
 }
 
 // Compute both f and df together.
@@ -209,7 +207,7 @@ void wgsl_mixed_optim_fdf(gsl_vector *x, void *params, double *f,
 int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
                        gsl_vector_int *nlev, gsl_matrix *Xc, gsl_vector *y,
                        double lambdaL1, double lambdaL2) {
-  double mLogLik = 0;
+  // double mLogLik = 0;
   fix_parm_mixed_T p;
   int npar = beta->size;
   int iter = 0;
@@ -224,13 +222,13 @@ int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
   p.lambdaL2 = lambdaL2;
 
   // Initial fit.
-  mLogLik = wgsl_mixed_optim_f(beta, &p);
+  // auto mLogLik = wgsl_mixed_optim_f(beta, &p);
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
+  gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_safe_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+  gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
 
   for (iter = 0; iter < 100; iter++) {
     wgsl_mixed_optim_hessian(beta, &p, myH); // Calculate Hessian.
@@ -250,7 +248,7 @@ int logistic_mixed_fit(gsl_vector *beta, gsl_matrix_int *X,
   }
 
   // Final fit.
-  mLogLik = wgsl_mixed_optim_f(beta, &p);
+  // mLogLik = wgsl_mixed_optim_f(beta, &p);
 
   gsl_vector_free(tau);
   gsl_vector_free(stBeta);
@@ -298,7 +296,7 @@ double fLogit_cat(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
   for (int i = 0; i < n; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < X->size2; ++k) {
+    for (size_t k = 0; k < X->size2; ++k) {
       if (gsl_matrix_int_get(X, i, k) > 0)
         Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
       iParm += nlev->data[k] - 1;
@@ -314,10 +312,10 @@ void logistic_cat_pred(gsl_vector *beta,     // Vector of parameters
                        gsl_vector_int *nlev, // Vector with #categories
                        gsl_vector *yhat) {   // Vector of prob. predicted by
                                              // the logistic.
-  for (int i = 0; i < X->size1; ++i) {
+  for (size_t i = 0; i < X->size1; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < X->size2; ++k) {
+    for (size_t k = 0; k < X->size2; ++k) {
       if (gsl_matrix_int_get(X, i, k) > 0)
         Xbetai += beta->data[gsl_matrix_int_get(X, i, k) - 1 + iParm];
       iParm += nlev->data[k] - 1;
@@ -440,7 +438,7 @@ void wgsl_cat_optim_fdf(gsl_vector *x, void *params, double *f,
 
 int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
                      gsl_vector *y, double lambdaL1, double lambdaL2) {
-  double mLogLik = 0;
+  // double mLogLik = 0;
   fix_parm_cat_T p;
   int npar = beta->size;
   int iter = 0;
@@ -453,14 +451,16 @@ int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
   p.lambdaL1 = lambdaL1;
   p.lambdaL2 = lambdaL2;
 
+#ifdef _RPR_DEBUG_
   // Initial fit.
-  mLogLik = wgsl_cat_optim_f(beta, &p);
+  auto mLogLik = wgsl_cat_optim_f(beta, &p);
+#endif
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
+  gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_safe_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+  gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
 
   for (iter = 0; iter < 100; iter++) {
     wgsl_cat_optim_hessian(beta, &p, myH); // Calculate Hessian.
@@ -484,7 +484,7 @@ int logistic_cat_fit(gsl_vector *beta, gsl_matrix_int *X, gsl_vector_int *nlev,
   }
 
   // Final fit.
-  mLogLik = wgsl_cat_optim_f(beta, &p);
+  // mLogLik = wgsl_cat_optim_f(beta, &p);
 
   gsl_vector_free(tau);
   gsl_vector_free(stBeta);
@@ -507,7 +507,7 @@ typedef struct {
   double lambdaL2;
 } fix_parm_cont_T;
 
-double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
+double fLogit_cont(const gsl_vector *beta, const gsl_matrix *Xc, const gsl_vector *y,
                    double lambdaL1, double lambdaL2) {
   int n = y->size;
   int npar = beta->size;
@@ -531,7 +531,7 @@ double fLogit_cont(gsl_vector *beta, gsl_matrix *Xc, gsl_vector *y,
   for (int i = 0; i < n; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < (Xc->size2); ++k)
+    for (size_t k = 0; k < (Xc->size2); ++k)
       Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
     total += y->data[i] * Xbetai - gsl_sf_log_1plusx(gsl_sf_exp(Xbetai));
   }
@@ -544,17 +544,17 @@ void logistic_cont_pred(gsl_vector *beta,   // Vector of parameters
                                             // Nobs x Kc (NULL if not used).
                         gsl_vector *yhat) { // Vector of prob. predicted by
                                             // the logistic.
-  for (int i = 0; i < Xc->size1; ++i) {
+  for (size_t i = 0; i < Xc->size1; ++i) {
     double Xbetai = beta->data[0];
     int iParm = 1;
-    for (int k = 0; k < (Xc->size2); ++k)
+    for (size_t k = 0; k < (Xc->size2); ++k)
       Xbetai += gsl_matrix_get(Xc, i, k) * beta->data[iParm++];
     yhat->data[i] = 1 / (1 + gsl_sf_exp(-Xbetai));
   }
 }
 
 // The gradient of f, df = (df/dx, df/dy).
-void wgsl_cont_optim_df(const gsl_vector *beta, void *params, gsl_vector *out) {
+void wgsl_cont_optim_df(const gsl_vector *beta, const void *params, gsl_vector *out) {
   fix_parm_cont_T *p = (fix_parm_cont_T *)params;
   int n = p->y->size;
   int Kc = p->Xc->size2;
@@ -596,7 +596,7 @@ void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params,
   int n = p->y->size;
   int Kc = p->Xc->size2;
   int npar = beta->size;
-  gsl_vector *gn = gsl_vector_alloc(npar); // gn.
+  gsl_vector *gn = gsl_vector_safe_alloc(npar); // gn.
 
   // Intitialize Hessian out necessary ???
 
@@ -639,7 +639,7 @@ void wgsl_cont_optim_hessian(const gsl_vector *beta, void *params,
   gsl_vector_free(gn);
 }
 
-double wgsl_cont_optim_f(gsl_vector *v, void *params) {
+double wgsl_cont_optim_f(const gsl_vector *v, const void *params) {
   double mLogLik = 0;
   fix_parm_cont_T *p = (fix_parm_cont_T *)params;
   mLogLik = fLogit_cont(v, p->Xc, p->y, p->lambdaL1, p->lambdaL2);
@@ -647,7 +647,7 @@ double wgsl_cont_optim_f(gsl_vector *v, void *params) {
 }
 
 // Compute both f and df together.
-void wgsl_cont_optim_fdf(gsl_vector *x, void *params, double *f,
+void wgsl_cont_optim_fdf(const gsl_vector *x, const void *params, double *f,
                          gsl_vector *df) {
   *f = wgsl_cont_optim_f(x, params);
   wgsl_cont_optim_df(x, params, df);
@@ -658,7 +658,6 @@ int logistic_cont_fit(gsl_vector *beta,
                                       // Nobs x Kc (NULL if not used).
                       gsl_vector *y, double lambdaL1, double lambdaL2) {
 
-  double mLogLik = 0;
   fix_parm_cont_T p;
   int npar = beta->size;
   int iter = 0;
@@ -670,14 +669,16 @@ int logistic_cont_fit(gsl_vector *beta,
   p.lambdaL1 = lambdaL1;
   p.lambdaL2 = lambdaL2;
 
+#ifdef _RPR_DEBUG_
   // Initial fit.
-  mLogLik = wgsl_cont_optim_f(beta, &p);
+  auto mLogLik = wgsl_cont_optim_f(beta, &p);
+#endif
 
-  gsl_matrix *myH = gsl_matrix_alloc(npar, npar); // Hessian matrix.
-  gsl_vector *stBeta = gsl_vector_alloc(npar);    // Direction to move.
+  gsl_matrix *myH = gsl_matrix_safe_alloc(npar, npar); // Hessian matrix.
+  gsl_vector *stBeta = gsl_vector_safe_alloc(npar);    // Direction to move.
 
-  gsl_vector *myG = gsl_vector_alloc(npar); // Gradient.
-  gsl_vector *tau = gsl_vector_alloc(npar); // tau for QR.
+  gsl_vector *myG = gsl_vector_safe_alloc(npar); // Gradient.
+  gsl_vector *tau = gsl_vector_safe_alloc(npar); // tau for QR.
 
   for (iter = 0; iter < 100; iter++) {
     wgsl_cont_optim_hessian(beta, &p, myH); // Calculate Hessian.
@@ -701,7 +702,7 @@ int logistic_cont_fit(gsl_vector *beta,
   }
 
   // Final fit.
-  mLogLik = wgsl_cont_optim_f(beta, &p);
+  // mLogLik = wgsl_cont_optim_f(beta, &p);
 
   gsl_vector_free(tau);
   gsl_vector_free(stBeta);
diff --git a/src/logistic.h b/src/logistic.h
index bebcbf6..c8c0cb3 100644
--- a/src/logistic.h
+++ b/src/logistic.h
@@ -58,8 +58,8 @@ int logistic_cont_fit(gsl_vector *beta, // Vector of parameters
                       double lambdaL1,  // Regularization L1, 0 if not used.
                       double lambdaL2); // Regularization L2, 0 if not used.
 
-double fLogit_cont(gsl_vector *beta,
-                   gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
-                   gsl_vector *y, double lambdaL1, double lambdaL2);
+double fLogit_cont(const gsl_vector *beta,
+                   const gsl_matrix *Xc, // Continuous covariates matrix Nobs x Kc.
+                   const gsl_vector *y, double lambdaL1, double lambdaL2);
 
 #endif
diff --git a/src/main.cpp b/src/main.cpp
index 92c4d90..706ac35 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -33,19 +33,23 @@ int main(int argc, char *argv[]) {
 
   if (argc <= 1) {
     cGemma.PrintHeader();
+    cGemma.PrintHelp(0);
     return EXIT_SUCCESS;
   }
   if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'h') {
+    cGemma.PrintHeader();
     cGemma.PrintHelp(0);
     return EXIT_SUCCESS;
   }
   if (argc == 3 && argv[1][0] == '-' && argv[1][1] == 'h') {
     string str;
     str.assign(argv[2]);
+    cGemma.PrintHeader();
     cGemma.PrintHelp(atoi(str.c_str()));
     return EXIT_SUCCESS;
   }
   if (argc == 2 && argv[1][0] == '-' && argv[1][1] == 'l') {
+    cGemma.PrintHeader();
     cGemma.PrintLicense();
     return EXIT_SUCCESS;
   }
@@ -57,11 +61,14 @@ int main(int argc, char *argv[]) {
     mkdir((cPar.path_out).c_str(), S_IRWXU | S_IRGRP | S_IROTH);
   }
 
+  if (!is_quiet_mode())
+    cGemma.PrintHeader();
+
   if (cPar.error == true) {
     return EXIT_FAILURE;
   }
 
-  if (cPar.mode_silence) {
+  if (is_quiet_mode()) {
     stringstream ss;
     cout.rdbuf(ss.rdbuf());
   }
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 4203837..9076c47 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -1,19 +1,21 @@
 /*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011-2017, Xiang Zhou
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #include <bitset>
@@ -32,7 +34,7 @@
 #include <tuple>
 #include <vector>
 
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
 
 #include "gsl/gsl_version.h"
 
@@ -40,6 +42,7 @@
 #pragma message "GSL version " GSL_VERSION
 #endif
 
+#include "gsl/gsl_sys.h" // for gsl_isnan, gsl_isinf, gsl_isfinite
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
 #include "gsl/gsl_linalg.h"
@@ -48,21 +51,49 @@
 #include "gsl/gsl_eigen.h"
 
 #include "debug.h"
-#include "eigenlib.h"
+// #include "eigenlib.h"
+#include "fastblas.h"
 #include "lapack.h"
 #include "mathfunc.h"
 
 using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
 
 bool has_nan(const vector<double> v) {
   for (const auto& e: v) {
-    if (std::isnan(e))
+    if (is_nan(e))
       return true;
   }
   return false;
 }
 
+bool has_nan(const gsl_vector *v) {
+  for (size_t i = 0; i < v->size; ++i)
+    if (gsl_isnan(gsl_vector_get(v,i))) return true;
+  return false;
+}
+bool has_inf(const gsl_vector *v) {
+  for (size_t i = 0; i < v->size; ++i) {
+    auto value = gsl_vector_get(v,i);
+    if (gsl_isinf(value) != 0) return true;
+  }
+  return false;
+}
+bool has_nan(const gsl_matrix *m) {
+  for (size_t i = 0; i < m->size1; ++i)
+    for (size_t j = 0; j < m->size2; ++j)
+      if (gsl_isnan(gsl_matrix_get(m,i,j))) return true;
+  return false;
+}
+bool has_inf(const gsl_matrix *m) {
+  for (size_t i = 0; i < m->size1; ++i)
+    for (size_t j = 0; j < m->size2; ++j) {
+      auto value = gsl_matrix_get(m,i,j);
+      if (gsl_isinf(value) != 0) return true;
+    }
+  return false;
+}
+
 // calculate variance of a vector
 double VectorVar(const gsl_vector *v) {
   double d, m = 0.0, m2 = 0.0;
@@ -79,8 +110,8 @@ double VectorVar(const gsl_vector *v) {
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G) {
   double d;
-  gsl_vector *w = gsl_vector_alloc(G->size1);
-  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+  gsl_vector *w = gsl_vector_safe_alloc(G->size1);
+  gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
   gsl_vector_set_all(w, 1.0);
 
   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -95,8 +126,8 @@ void CenterMatrix(gsl_matrix *G) {
     }
   }
 
-  gsl_vector_free(w);
-  gsl_vector_free(Gw);
+  gsl_vector_safe_free(w);
+  gsl_vector_safe_free(Gw);
 
   return;
 }
@@ -104,7 +135,7 @@ void CenterMatrix(gsl_matrix *G) {
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
   double d, wtw;
-  gsl_vector *Gw = gsl_vector_alloc(G->size1);
+  gsl_vector *Gw = gsl_vector_safe_alloc(G->size1);
 
   gsl_blas_ddot(w, w, &wtw);
   gsl_blas_dgemv(CblasNoTrans, 1.0, G, w, 0.0, Gw);
@@ -119,19 +150,19 @@ void CenterMatrix(gsl_matrix *G, const gsl_vector *w) {
     }
   }
 
-  gsl_vector_free(Gw);
+  gsl_vector_safe_free(Gw);
 
   return;
 }
 
 // Center the matrix G.
 void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWi = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *WtWiWt = gsl_matrix_alloc(W->size2, G->size1);
-  gsl_matrix *GW = gsl_matrix_alloc(G->size1, W->size2);
-  gsl_matrix *WtGW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_matrix *Gtmp = gsl_matrix_alloc(G->size1, G->size1);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtWiWt = gsl_matrix_safe_alloc(W->size2, G->size1);
+  gsl_matrix *GW = gsl_matrix_safe_alloc(G->size1, W->size2);
+  gsl_matrix *WtGW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *Gtmp = gsl_matrix_safe_alloc(G->size1, G->size1);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
 
@@ -155,12 +186,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W) {
 
   gsl_matrix_add(G, Gtmp);
 
-  gsl_matrix_free(WtW);
-  gsl_matrix_free(WtWi);
-  gsl_matrix_free(WtWiWt);
-  gsl_matrix_free(GW);
-  gsl_matrix_free(WtGW);
-  gsl_matrix_free(Gtmp);
+  gsl_matrix_safe_free(WtW);
+  gsl_matrix_safe_free(WtWi);
+  gsl_matrix_safe_free(WtWiWt);
+  gsl_matrix_safe_free(GW);
+  gsl_matrix_safe_free(WtGW);
+  gsl_matrix_safe_free(Gtmp);
 
   return;
 }
@@ -210,8 +241,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
   auto m = G->data;
   // upper triangle
   auto size = G->size1;
-  for(auto c = 0; c < size; c++) {
-    for(auto r = 0; r < c; r++) {
+  for(size_t c = 0; c < size; c++) {
+    for(size_t r = 0; r < c; r++) {
       int x1 = c, y1 = r, x2 = r, y2 = c;
       auto idx1 = y1*size+x1, idx2 = y2*size+x2;
       // printf("(%d,%d %f - %d,%d %f)",x1,y1,m[idx1],x2,y2,m[idx2]);
@@ -226,8 +257,8 @@ bool isMatrixSymmetric(const gsl_matrix *G) {
 
 bool isMatrixPositiveDefinite(const gsl_matrix *G) {
   enforce(G->size1 == G->size2);
-  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
-  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
   auto handler = gsl_set_error_handler_off();
 #if GSL_MAJOR_VERSION >= 2 && GSL_MINOR_VERSION >= 3
   auto s = gsl_linalg_cholesky_decomp1(G2);
@@ -235,20 +266,24 @@ bool isMatrixPositiveDefinite(const gsl_matrix *G) {
   auto s = gsl_linalg_cholesky_decomp(G2);
 #endif
   gsl_set_error_handler(handler);
+  if (s == GSL_SUCCESS) {
+    gsl_matrix_safe_free(G2);
+    return true;
+  }
   gsl_matrix_free(G2);
-  return (s == GSL_SUCCESS);
+  return (false);
 }
 
 gsl_vector *getEigenValues(const gsl_matrix *G) {
   enforce(G->size1 == G->size2);
-  auto G2 = gsl_matrix_alloc(G->size1, G->size2);
-  enforce_gsl(gsl_matrix_memcpy(G2,G));
+  auto G2 = gsl_matrix_safe_alloc(G->size1, G->size2);
+  enforce_gsl(gsl_matrix_safe_memcpy(G2,G));
   auto eworkspace = gsl_eigen_symm_alloc(G->size1);
   enforce(eworkspace);
-  gsl_vector *eigenvalues = gsl_vector_alloc(G->size1);
+  gsl_vector *eigenvalues = gsl_vector_safe_alloc(G->size1);
   enforce_gsl(gsl_eigen_symm(G2, eigenvalues, eworkspace));
   gsl_eigen_symm_free(eworkspace);
-  gsl_matrix_free(G2);
+  gsl_matrix_safe_free(G2);
   return eigenvalues;
 }
 
@@ -256,11 +291,27 @@ gsl_vector *getEigenValues(const gsl_matrix *G) {
 // by default 1E-5.
 // Returns success, eigen min, eigen min-but-1, eigen max
 
+tuple<double, double, double> minmax(const gsl_vector *v) {
+  auto min  = v->data[0];
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
+    auto value = std::abs(v->data[i]);
+    if (value < min) {
+      min1 = min;
+      min = value;
+    }
+    if (value > max)
+      max = value;
+  }
+  return std::make_tuple(min, min1, max);
+}
+
 tuple<double, double, double> abs_minmax(const gsl_vector *v) {
   auto min  = std::abs(v->data[0]);
-  auto min1 = std::abs(v->data[0]);
-  auto max  = std::abs(v->data[0]);
-  for (auto i=0; i<v->size; i++) {
+  auto min1 = min;
+  auto max  = min;
+  for (size_t i=1; i<v->size; i++) {
     auto value = std::abs(v->data[i]);
     if (value < min) {
       min1 = min;
@@ -276,7 +327,7 @@ tuple<double, double, double> abs_minmax(const gsl_vector *v) {
 // the lowest value
 bool has_negative_values_but_one(const gsl_vector *v) {
   bool one_skipped = false;
-  for (auto i=0; i<v->size; i++) {
+  for (size_t i=0; i<v->size; i++) {
     if (v->data[i] < 0.0) {
       if (one_skipped)
         return true;
@@ -286,11 +337,12 @@ bool has_negative_values_but_one(const gsl_vector *v) {
   return false;
 }
 
-uint count_small_values(const gsl_vector *v, double min) {
+uint count_abs_small_values(const gsl_vector *v, double min) {
   uint count = 0;
-  for (auto i=0; i<v->size; i++) {
-    if (v->data[i] < min)
+  for (size_t i=0; i<v->size; i++) {
+    if (std::abs(v->data[i]) < min) {
       count += 1;
+    }
   }
   return count;
 }
@@ -298,24 +350,35 @@ uint count_small_values(const gsl_vector *v, double min) {
 // Check for matrix being ill conditioned by taking the eigen values
 // and the ratio of max and min but one (min is expected to be zero).
 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio) {
-  bool ret_valid = true;
-
   auto t = abs_minmax(eigenvalues);
   auto absmin = get<0>(t);
   auto absmin1 = get<1>(t);
   auto absmax = get<2>(t);
   if (absmax/absmin1 > max_ratio) {
     #if !NDEBUG
-    cerr << "**** DEBUG: Eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Ratio |eigenmax|/|eigenmin| suggests matrix is ill conditioned for double precision" << endl;
+    auto t = minmax(eigenvalues);
+    auto min = get<0>(t);
+    auto min1 = get<1>(t);
+    auto max = get<2>(t);
+    cerr << "**** DEBUG: Abs eigenvalues [Min " << absmin << ", " << absmin1 << " ... " << absmax << " Max] Ratio (" << absmax << "/" << absmin1 << ") = " << absmax/absmin1 << endl;
+    cerr << "**** DEBUG: Eigenvalues [Min " << min << ", " << min1 << " ... " << max << " Max]" << endl;
     #endif
-    ret_valid = false;
+    return true;
   }
-  return ret_valid;
+  return false;
+}
+
+double sum(const double *m, size_t rows, size_t cols) {
+  double sum = 0.0;
+  for (size_t i = 0; i<rows*cols; i++)
+    sum += m[i];
+  return sum;
 }
 
 double SumVector(const gsl_vector *v) {
   double sum = 0;
-  for (int i = 0; i < v->size; i++ ) {
+  for (size_t i = 0; i < v->size; i++ ) {
     sum += gsl_vector_get(v, i);
   }
   return( sum );
@@ -337,9 +400,9 @@ double CenterVector(gsl_vector *y) {
 
 // Center the vector y.
 void CenterVector(gsl_vector *y, const gsl_matrix *W) {
-  gsl_matrix *WtW = gsl_matrix_alloc(W->size2, W->size2);
-  gsl_vector *Wty = gsl_vector_alloc(W->size2);
-  gsl_vector *WtWiWty = gsl_vector_alloc(W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_vector *Wty = gsl_vector_safe_alloc(W->size2);
+  gsl_vector *WtWiWty = gsl_vector_safe_alloc(W->size2);
 
   gsl_blas_dgemm(CblasTrans, CblasNoTrans, 1.0, W, W, 0.0, WtW);
   gsl_blas_dgemv(CblasTrans, 1.0, W, y, 0.0, Wty);
@@ -351,9 +414,9 @@ void CenterVector(gsl_vector *y, const gsl_matrix *W) {
 
   gsl_blas_dgemv(CblasNoTrans, -1.0, W, WtWiWty, 1.0, y);
 
-  gsl_matrix_free(WtW);
-  gsl_vector_free(Wty);
-  gsl_vector_free(WtWiWty);
+  gsl_matrix_safe_free(WtW);
+  gsl_vector_safe_free(Wty);
+  gsl_vector_safe_free(WtWiWty);
 
   return;
 }
@@ -379,22 +442,18 @@ void StandardizeVector(gsl_vector *y) {
 
 // Calculate UtX.
 void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
-  gsl_matrix *X = gsl_matrix_alloc(UtX->size1, UtX->size2);
-  gsl_matrix_memcpy(X, UtX);
-  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
-  gsl_matrix_free(X);
-
-  return;
+  gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
+  gsl_matrix_safe_memcpy(X, UtX);
+  fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
+  gsl_matrix_safe_free(X);
 }
 
 void CalcUtX(const gsl_matrix *U, const gsl_matrix *X, gsl_matrix *UtX) {
-  eigenlib_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
-  return;
+  fast_dgemm("T", "N", 1.0, U, X, 0.0, UtX);
 }
 
 void CalcUtX(const gsl_matrix *U, const gsl_vector *x, gsl_vector *Utx) {
   gsl_blas_dgemv(CblasTrans, 1.0, U, x, 0.0, Utx);
-  return;
 }
 
 // Kronecker product.
@@ -403,7 +462,7 @@ void Kronecker(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
     for (size_t j = 0; j < K->size2; j++) {
       gsl_matrix_view H_sub = gsl_matrix_submatrix(
           H, i * V->size1, j * V->size2, V->size1, V->size2);
-      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_safe_memcpy(&H_sub.matrix, V);
       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
     }
   }
@@ -416,13 +475,13 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H) {
     for (size_t j = i; j < K->size2; j++) {
       gsl_matrix_view H_sub = gsl_matrix_submatrix(
           H, i * V->size1, j * V->size2, V->size1, V->size2);
-      gsl_matrix_memcpy(&H_sub.matrix, V);
+      gsl_matrix_safe_memcpy(&H_sub.matrix, V);
       gsl_matrix_scale(&H_sub.matrix, gsl_matrix_get(K, i, j));
 
       if (i != j) {
         gsl_matrix_view H_sub_sym = gsl_matrix_submatrix(
             H, j * V->size1, i * V->size2, V->size1, V->size2);
-        gsl_matrix_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
+        gsl_matrix_safe_memcpy(&H_sub_sym.matrix, &H_sub.matrix);
       }
     }
   }
@@ -520,6 +579,7 @@ unsigned char Double02ToUchar(const double dosage) {
   return (int)(dosage * 100);
 }
 
+/*
 void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
                           const size_t i_row, VectorXd &x_row) {
   if (i_row < X.size()) {
@@ -531,3 +591,5 @@ void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
     exit(1);
   }
 }
+
+*/
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 6e20b37..8258c22 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -1,25 +1,27 @@
 /*
- Genome-wide Efficient Mixed Model Association (GEMMA)
- Copyright (C) 2011-2017, Xiang Zhou
+    Genome-wide Efficient Mixed Model Association (GEMMA)
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
 
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- GNU General Public License for more details.
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
 
- You should have received a copy of the GNU General Public License
- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+    You should have received a copy of the GNU General Public License
+    along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
 #ifndef __MATHFUNC_H__
 #define __MATHFUNC_H__
 
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
 
@@ -27,9 +29,16 @@
 #define EIGEN_MINVALUE 1e-10
 
 using namespace std;
-using namespace Eigen;
+
+inline bool is_nan(double f) {
+  return (std::isnan(f));
+}
 
 bool has_nan(const vector<double> v);
+bool has_nan(const gsl_vector *v);
+bool has_inf(const gsl_vector *v);
+bool has_nan(const gsl_matrix *m);
+bool has_inf(const gsl_matrix *m);
 
 double VectorVar(const gsl_vector *v);
 void CenterMatrix(gsl_matrix *G);
@@ -38,11 +47,12 @@ void CenterMatrix(gsl_matrix *G, const gsl_matrix *W);
 void StandardizeMatrix(gsl_matrix *G);
 double ScaleMatrix(gsl_matrix *G);
 bool has_negative_values_but_one(const gsl_vector *v);
-uint count_small_values(const gsl_vector *v, double min);
+uint count_abs_small_values(const gsl_vector *v, double min);
 bool isMatrixPositiveDefinite(const gsl_matrix *G);
 bool isMatrixIllConditioned(const gsl_vector *eigenvalues, double max_ratio=CONDITIONED_MAXRATIO);
 bool isMatrixSymmetric(const gsl_matrix *G);
 gsl_vector *getEigenValues(const gsl_matrix *G);
+double sum(const double *m, size_t rows, size_t cols);
 double SumVector(const gsl_vector *v);
 double CenterVector(gsl_vector *y);
 void CenterVector(gsl_vector *y, const gsl_matrix *W);
@@ -56,7 +66,7 @@ void KroneckerSym(const gsl_matrix *K, const gsl_matrix *V, gsl_matrix *H);
 
 double UcharToDouble02(const unsigned char c);
 unsigned char Double02ToUchar(const double dosage);
-void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
-                          const size_t i_row, VectorXd &x_row);
+// void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
+//                          const size_t i_row, Eigen::VectorXd &x_row);
 
 #endif
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index c5efb6e..eee562d 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -31,14 +31,13 @@
 
 #include "gsl/gsl_blas.h"
 #include "gsl/gsl_cdf.h"
-#include "gsl/gsl_integration.h"
 #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
 
-#include "eigenlib.h"
+#include "fastblas.h"
 #include "gzstream.h"
 #include "io.h"
 #include "lapack.h"
@@ -54,7 +53,6 @@ void MVLMM::CopyFromParam(PARAM &cPar) {
 
   file_bfile = cPar.file_bfile;
   file_geno = cPar.file_geno;
-  file_oxford = cPar.file_oxford;
   file_out = cPar.file_out;
   path_out = cPar.path_out;
 
@@ -719,7 +717,7 @@ double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
                 gsl_matrix *Vbeta) {
   size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
   size_t dc_size = d_size * c_size;
-  double delta, dl, d, d1, d2, dy, dx, dw, logdet_Ve, logdet_Q, p_value;
+  double delta, dl, d, d1, d2, dy, dx, dw; //  logdet_Ve, logdet_Q, p_value;
 
   gsl_vector *D_l = gsl_vector_alloc(d_size);
   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
@@ -738,10 +736,12 @@ double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
   gsl_vector_set_zero(WHiy);
 
   // Eigen decomposition and calculate log|Ve|.
-  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
 
   // Calculate Qi and log|Q|.
-  logdet_Q = CalcQi(eval, D_l, W, Qi);
+  // double logdet_Q = CalcQi(eval, D_l, W, Qi);
+  CalcQi(eval, D_l, W, Qi);
 
   // Calculate UltVehiY.
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
@@ -799,7 +799,7 @@ double MphCalcP(const gsl_vector *eval, const gsl_vector *x_vec,
   // Calculate test statistic and p value.
   gsl_blas_ddot(D_l, xPy, &d);
 
-  p_value = gsl_cdf_chisq_Q(d, (double)d_size);
+  double p_value = gsl_cdf_chisq_Q(d, (double)d_size);
 
   gsl_vector_free(D_l);
   gsl_matrix_free(UltVeh);
@@ -825,7 +825,7 @@ void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
                  gsl_matrix *se_B) {
   size_t n_size = eval->size, c_size = W->size1, d_size = V_g->size1;
   size_t dc_size = d_size * c_size;
-  double delta, dl, d, dy, dw, logdet_Ve, logdet_Q;
+  double delta, dl, d, dy, dw; // , logdet_Ve, logdet_Q;
 
   gsl_vector *D_l = gsl_vector_alloc(d_size);
   gsl_matrix *UltVeh = gsl_matrix_alloc(d_size, d_size);
@@ -840,10 +840,12 @@ void MphCalcBeta(const gsl_vector *eval, const gsl_matrix *W,
   gsl_vector_set_zero(WHiy);
 
   // Eigen decomposition and calculate log|Ve|.
-  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
 
   // Calculate Qi and log|Q|.
-  logdet_Q = CalcQi(eval, D_l, W, Qi);
+  // double logdet_Q = CalcQi(eval, D_l, W, Qi);
+  CalcQi(eval, D_l, W, Qi);
 
   // Calculate UltVehiY.
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
@@ -2878,13 +2880,15 @@ void MphInitial(const size_t em_iter, const double em_prec,
 
   gsl_vector_set_zero(XHiy);
 
-  double logdet_Ve, logdet_Q, dl, d, delta, dx, dy;
+  double dl, d, delta, dx, dy;
 
   // Eigen decomposition and calculate log|Ve|.
-  logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  // double logdet_Ve = EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
+  EigenProc(V_g, V_e, D_l, UltVeh, UltVehi);
 
   // Calculate Qi and log|Q|.
-  logdet_Q = CalcQi(eval, D_l, X, Qi);
+  // double logdet_Q = CalcQi(eval, D_l, X, Qi);
+  CalcQi(eval, D_l, X, Qi);
 
   // Calculate UltVehiY.
   gsl_blas_dgemm(CblasNoTrans, CblasNoTrans, 1.0, UltVehi, Y, 0.0, UltVehiY);
@@ -2950,556 +2954,6 @@ double PCRT(const size_t mode, const size_t d_size, const double p_value,
   return p_crt;
 }
 
-// WJA added.
-void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
-                        const gsl_matrix *UtW, const gsl_matrix *UtY) {
-  debug_msg("entering");
-  string file_bgen = file_oxford + ".bgen";
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return;
-  }
-
-  clock_t time_start = clock();
-  time_UtX = 0;
-  time_opt = 0;
-
-  string line;
-
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
-
-  double logl_H0 = 0.0, logl_H1 = 0.0, p_wald = 0, p_lrt = 0, p_score = 0;
-  double crt_a, crt_b, crt_c;
-  int n_miss, c_phen;
-  double geno, x_mean;
-  size_t c = 0;
-  size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
-
-  size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
-
-  // Large matrices for EM.
-  gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *E_hat = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *OmegaU = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *OmegaE = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *UltVehiY = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *UltVehiBX = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *UltVehiU = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *UltVehiE = gsl_matrix_alloc(d_size, n_size);
-
-  // Large matrices for NR. Each dxd block is H_k^{-1}.
-  gsl_matrix *Hi_all = gsl_matrix_alloc(d_size, d_size * n_size);
-
-  // Each column is H_k^{-1}y_k.
-  gsl_matrix *Hiy_all = gsl_matrix_alloc(d_size, n_size);
-
-  // Each dcxdc block is x_k\otimes H_k^{-1}.
-  gsl_matrix *xHi_all = gsl_matrix_alloc(dc_size, d_size * n_size);
-  gsl_matrix *Hessian = gsl_matrix_alloc(v_size * 2, v_size * 2);
-  gsl_vector *x = gsl_vector_alloc(n_size);
-  gsl_vector *x_miss = gsl_vector_alloc(n_size);
-
-  gsl_matrix *Y = gsl_matrix_alloc(d_size, n_size);
-  gsl_matrix *X = gsl_matrix_alloc(c_size + 1, n_size);
-  gsl_matrix *V_g = gsl_matrix_alloc(d_size, d_size);
-  gsl_matrix *V_e = gsl_matrix_alloc(d_size, d_size);
-  gsl_matrix *B = gsl_matrix_alloc(d_size, c_size + 1);
-  gsl_vector *beta = gsl_vector_alloc(d_size);
-  gsl_matrix *Vbeta = gsl_matrix_alloc(d_size, d_size);
-
-  // Null estimates for initial values.
-  gsl_matrix *V_g_null = gsl_matrix_alloc(d_size, d_size);
-  gsl_matrix *V_e_null = gsl_matrix_alloc(d_size, d_size);
-  gsl_matrix *B_null = gsl_matrix_alloc(d_size, c_size + 1);
-  gsl_matrix *se_B_null = gsl_matrix_alloc(d_size, c_size);
-
-  gsl_matrix_view X_sub = gsl_matrix_submatrix(X, 0, 0, c_size, n_size);
-  gsl_matrix_view B_sub = gsl_matrix_submatrix(B, 0, 0, d_size, c_size);
-  gsl_matrix_view xHi_all_sub =
-      gsl_matrix_submatrix(xHi_all, 0, 0, d_size * c_size, d_size * n_size);
-
-  gsl_matrix_transpose_memcpy(Y, UtY);
-
-  gsl_matrix_transpose_memcpy(&X_sub.matrix, UtW);
-
-  gsl_vector_view X_row = gsl_matrix_row(X, c_size);
-  gsl_vector_set_zero(&X_row.vector);
-  gsl_vector_view B_col = gsl_matrix_column(B, c_size);
-  gsl_vector_set_zero(&B_col.vector);
-
-  MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, &X_sub.matrix, Y, l_min,
-             l_max, n_region, V_g, V_e, &B_sub.matrix);
-  logl_H0 = MphEM('R', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
-                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
-                  V_e, &B_sub.matrix);
-  logl_H0 = MphNR('R', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
-                  crt_c);
-  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
-              se_B_null);
-
-  c = 0;
-  Vg_remle_null.clear();
-  Ve_remle_null.clear();
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = i; j < d_size; j++) {
-      Vg_remle_null.push_back(gsl_matrix_get(V_g, i, j));
-      Ve_remle_null.push_back(gsl_matrix_get(V_e, i, j));
-      VVg_remle_null.push_back(gsl_matrix_get(Hessian, c, c));
-      VVe_remle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
-      c++;
-    }
-  }
-  beta_remle_null.clear();
-  se_beta_remle_null.clear();
-  for (size_t i = 0; i < se_B_null->size1; i++) {
-    for (size_t j = 0; j < se_B_null->size2; j++) {
-      beta_remle_null.push_back(gsl_matrix_get(B, i, j));
-      se_beta_remle_null.push_back(gsl_matrix_get(se_B_null, i, j));
-    }
-  }
-  logl_remle_H0 = logl_H0;
-
-  cout.setf(std::ios_base::fixed, std::ios_base::floatfield);
-  cout.precision(4);
-
-  cout << "REMLE estimate for Vg in the null model: " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      cout << gsl_matrix_get(V_g, i, j) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "se(Vg): " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      c = GetIndex(i, j, d_size);
-      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "REMLE estimate for Ve in the null model: " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      cout << gsl_matrix_get(V_e, i, j) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "se(Ve): " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      c = GetIndex(i, j, d_size);
-      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "REMLE likelihood = " << logl_H0 << endl;
-
-  logl_H0 = MphEM('L', em_iter, em_prec, eval, &X_sub.matrix, Y, U_hat, E_hat,
-                  OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g,
-                  V_e, &B_sub.matrix);
-  logl_H0 = MphNR('L', nr_iter, nr_prec, eval, &X_sub.matrix, Y, Hi_all,
-                  &xHi_all_sub.matrix, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b,
-                  crt_c);
-  MphCalcBeta(eval, &X_sub.matrix, Y, V_g, V_e, UltVehiY, &B_sub.matrix,
-              se_B_null);
-
-  c = 0;
-  Vg_mle_null.clear();
-  Ve_mle_null.clear();
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = i; j < d_size; j++) {
-      Vg_mle_null.push_back(gsl_matrix_get(V_g, i, j));
-      Ve_mle_null.push_back(gsl_matrix_get(V_e, i, j));
-      VVg_mle_null.push_back(gsl_matrix_get(Hessian, c, c));
-      VVe_mle_null.push_back(gsl_matrix_get(Hessian, c + v_size, c + v_size));
-      c++;
-    }
-  }
-  beta_mle_null.clear();
-  se_beta_mle_null.clear();
-  for (size_t i = 0; i < se_B_null->size1; i++) {
-    for (size_t j = 0; j < se_B_null->size2; j++) {
-      beta_mle_null.push_back(gsl_matrix_get(B, i, j));
-      se_beta_mle_null.push_back(gsl_matrix_get(se_B_null, i, j));
-    }
-  }
-  logl_mle_H0 = logl_H0;
-
-  cout << "MLE estimate for Vg in the null model: " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      cout << gsl_matrix_get(V_g, i, j) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "se(Vg): " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      c = GetIndex(i, j, d_size);
-      cout << sqrt(gsl_matrix_get(Hessian, c, c)) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "MLE estimate for Ve in the null model: " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      cout << gsl_matrix_get(V_e, i, j) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "se(Ve): " << endl;
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = 0; j <= i; j++) {
-      c = GetIndex(i, j, d_size);
-      cout << sqrt(gsl_matrix_get(Hessian, c + v_size, c + v_size)) << "\t";
-    }
-    cout << endl;
-  }
-  cout << "MLE likelihood = " << logl_H0 << endl;
-
-  vector<double> v_beta, v_Vg, v_Ve, v_Vbeta;
-  for (size_t i = 0; i < d_size; i++) {
-    v_beta.push_back(0.0);
-  }
-  for (size_t i = 0; i < d_size; i++) {
-    for (size_t j = i; j < d_size; j++) {
-      v_Vg.push_back(0.0);
-      v_Ve.push_back(0.0);
-      v_Vbeta.push_back(0.0);
-    }
-  }
-
-  gsl_matrix_memcpy(V_g_null, V_g);
-  gsl_matrix_memcpy(V_e_null, V_e);
-  gsl_matrix_memcpy(B_null, B);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-  double bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  std::cout << "Warning: WJA hard coded SNP missingness threshold "
-            << "of 10%" << std::endl;
-
-  // Start reading genotypes and analyze.
-  size_t csnp = 0, t_last = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-    t_last++;
-  }
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-
-    // Read SNP header.
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(x_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
-      // WJA.
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
-        n_miss++;
-      } else {
-
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
-      }
-      c_phen++;
-    }
-
-    x_mean /= static_cast<double>(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
-        gsl_vector_set(x, i, x_mean);
-      }
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, csnp % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    csnp++;
-
-    if (csnp % msize == 0 || csnp == t_last) {
-      size_t l = 0;
-      if (csnp % msize == 0) {
-        l = msize;
-      } else {
-        l = csnp % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(&X_row.vector, &UtXlarge_col.vector);
-
-        // Initial values.
-        gsl_matrix_memcpy(V_g, V_g_null);
-        gsl_matrix_memcpy(V_e, V_e_null);
-        gsl_matrix_memcpy(B, B_null);
-
-        time_start = clock();
-
-        // 3 is before 1.
-        if (a_mode == 3 || a_mode == 4) {
-          p_score = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g_null,
-                             V_e_null, UltVehiY, beta, Vbeta);
-          if (p_score < p_nr && crt == 1) {
-            logl_H1 = MphNR('R', 1, nr_prec * 10, eval, X, Y, Hi_all, xHi_all,
-                            Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-            p_score = PCRT(3, d_size, p_score, crt_a, crt_b, crt_c);
-          }
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          logl_H1 = MphEM('L', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
-                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-                          UltVehiE, V_g, V_e, B);
-
-          // Calculate beta and Vbeta.
-          p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
-                           UltVehiY, beta, Vbeta);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
-
-          if (p_lrt < p_nr) {
-            logl_H1 =
-                MphNR('L', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
-                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-
-            // Calculate beta and Vbeta.
-            p_lrt = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
-                             UltVehiY, beta, Vbeta);
-            p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_H0), (double)d_size);
-
-            if (crt == 1) {
-              p_lrt = PCRT(2, d_size, p_lrt, crt_a, crt_b, crt_c);
-            }
-          }
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          logl_H1 = MphEM('R', em_iter / 10, em_prec * 10, eval, X, Y, U_hat,
-                          E_hat, OmegaU, OmegaE, UltVehiY, UltVehiBX, UltVehiU,
-                          UltVehiE, V_g, V_e, B);
-          p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
-                            UltVehiY, beta, Vbeta);
-
-          if (p_wald < p_nr) {
-            logl_H1 =
-                MphNR('R', nr_iter / 10, nr_prec * 10, eval, X, Y, Hi_all,
-                      xHi_all, Hiy_all, V_g, V_e, Hessian, crt_a, crt_b, crt_c);
-            p_wald = MphCalcP(eval, &X_row.vector, &X_sub.matrix, Y, V_g, V_e,
-                              UltVehiY, beta, Vbeta);
-
-            if (crt == 1) {
-              p_wald = PCRT(1, d_size, p_wald, crt_a, crt_b, crt_c);
-            }
-          }
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        for (size_t i = 0; i < d_size; i++) {
-          v_beta[i] = gsl_vector_get(beta, i);
-        }
-
-        c = 0;
-        for (size_t i = 0; i < d_size; i++) {
-          for (size_t j = i; j < d_size; j++) {
-            v_Vg[c] = gsl_matrix_get(V_g, i, j);
-            v_Ve[c] = gsl_matrix_get(V_e, i, j);
-            v_Vbeta[c] = gsl_matrix_get(Vbeta, i, j);
-            c++;
-          }
-        }
-
-        MPHSUMSTAT SNPs = {v_beta, p_wald, p_lrt, p_score, v_Vg, v_Ve, v_Vbeta};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  infile.close();
-  infile.clear();
-
-  gsl_matrix_free(U_hat);
-  gsl_matrix_free(E_hat);
-  gsl_matrix_free(OmegaU);
-  gsl_matrix_free(OmegaE);
-  gsl_matrix_free(UltVehiY);
-  gsl_matrix_free(UltVehiBX);
-  gsl_matrix_free(UltVehiU);
-  gsl_matrix_free(UltVehiE);
-
-  gsl_matrix_free(Hi_all);
-  gsl_matrix_free(Hiy_all);
-  gsl_matrix_free(xHi_all);
-  gsl_matrix_free(Hessian);
-
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-
-  gsl_matrix_free(Y);
-  gsl_matrix_free(X);
-  gsl_matrix_free(V_g);
-  gsl_matrix_free(V_e);
-  gsl_matrix_free(B);
-  gsl_vector_free(beta);
-  gsl_matrix_free(Vbeta);
-
-  gsl_matrix_free(V_g_null);
-  gsl_matrix_free(V_e_null);
-  gsl_matrix_free(B_null);
-  gsl_matrix_free(se_B_null);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
-
-  return;
-}
-
 void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_matrix *UtY) {
   debug_msg("entering");
@@ -3739,24 +3193,24 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
     t_last++;
   }
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+      ProgressBar("Reading SNPs", t, ns_total - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     x_mean = 0.0;
     c_phen = 0;
     n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -3801,8 +3255,8 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
           gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
       time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
+      fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                 &UtXlarge_sub.matrix);
       time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
       gsl_matrix_set_zero(Xlarge);
@@ -4190,7 +3644,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
   }
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+      ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
@@ -4268,7 +3722,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
           gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
 
       time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+      fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
                      &UtXlarge_sub.matrix);
       time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
@@ -4416,7 +3870,7 @@ void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
   size_t n_size = UtY->size1, d_size = UtY->size2, c_size = UtW->size2;
   size_t dc_size = d_size * c_size, v_size = d_size * (d_size + 1) / 2;
 
-  double logl, crt_a, crt_b, crt_c;
+  double crt_a, crt_b, crt_c;
 
   // Large matrices for EM.
   gsl_matrix *U_hat = gsl_matrix_alloc(d_size, n_size);
@@ -4448,10 +3902,10 @@ void CalcMvLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
   // Initial, EM, NR, and calculate B.
   MphInitial(em_iter, em_prec, nr_iter, nr_prec, eval, W, Y, l_min, l_max,
              n_region, V_g, V_e, B);
-  logl = MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
-               UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
-  logl = MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
-               V_e, Hessian, crt_a, crt_b, crt_c);
+  MphEM('R', em_iter, em_prec, eval, W, Y, U_hat, E_hat, OmegaU, OmegaE,
+        UltVehiY, UltVehiBX, UltVehiU, UltVehiE, V_g, V_e, B);
+  MphNR('R', nr_iter, nr_prec, eval, W, Y, Hi_all, xHi_all, Hiy_all, V_g,
+        V_e, Hessian, crt_a, crt_b, crt_c);
   MphCalcBeta(eval, W, Y, V_g, V_e, UltVehiY, B, se_B);
 
   // Free matrices.
@@ -4716,24 +4170,24 @@ void MVLMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
 
   // Start reading genotypes and analyze.
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+      ProgressBar("Reading SNPs", t, ns_total - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     x_mean = 0.0;
     c_phen = 0;
     n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -5175,7 +4629,7 @@ void MVLMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
 
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+      ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
diff --git a/src/param.cpp b/src/param.cpp
index 3b319e9..bf6c195 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -16,12 +18,13 @@
     along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 
+#include <iostream>
+#include <iomanip>
+#include <string>
 #include <algorithm>
 #include <cmath>
 #include <cstring>
 #include <fstream>
-#include <iostream>
-#include <string>
 #include <sys/stat.h>
 
 #include "gsl/gsl_blas.h"
@@ -66,7 +69,7 @@ void LOCO_set_Snps(set<string> &ksnps, set<string> &gwasnps,
 // (indicator_idv[x] == 1). This should match indicator_cvt etc. If
 // this gives problems with certain sets we can simply trim to size.
 
-void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
+void trim_individuals(vector<int> &idvs, size_t ni_max) {
   if (ni_max) {
     size_t count = 0;
     for (auto ind = idvs.begin(); ind != idvs.end(); ++ind) {
@@ -76,7 +79,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
         break;
     }
     if (count != idvs.size()) {
-      if (debug)
+      if (is_debug_mode())
         cout << "**** TEST MODE: trim individuals from " << idvs.size()
              << " to " << count << endl;
       idvs.resize(count);
@@ -87,7 +90,7 @@ void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
 // ---- PARAM class implementation
 
 PARAM::PARAM(void)
-    : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
+    : a_mode(0), k_mode(1), d_pace(DEFAULT_PACE),
       file_out("result"), path_out("./output/"), miss_level(0.05),
       maf_level(0.01), hwe_level(0), r2_level(0.9999), l_min(1e-5), l_max(1e5),
       n_region(10), p_nr(0.001), em_prec(0.0001), nr_prec(0.0001),
@@ -97,7 +100,7 @@ PARAM::PARAM(void)
       rho_ngrid(10), s_min(0), s_max(300), w_step(100000), s_step(1000000),
       r_pace(10), w_pace(1000), n_accept(0), n_mh(10), geo_mean(2000.0),
       randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
-      error(false), ni_subsample(0), n_cvt(1), n_vc(1), n_cat(0),
+      error(false), ni_subsample(0), n_cvt(1), n_cat(0), n_vc(1),
       time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
       time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
 
@@ -221,7 +224,7 @@ void PARAM::ReadFiles(void) {
   } else {
     n_cvt = 1;
   }
-  trim_individuals(indicator_cvt, ni_max, mode_debug);
+  trim_individuals(indicator_cvt, ni_max);
 
   if (!file_gxe.empty()) {
     if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
@@ -234,38 +237,7 @@ void PARAM::ReadFiles(void) {
     }
   }
 
-  trim_individuals(indicator_idv, ni_max, mode_debug);
-
-  // WJA added.
-  // Read genotype and phenotype file for bgen format.
-  if (!file_oxford.empty()) {
-    file_str = file_oxford + ".sample";
-    if (ReadFile_sample(file_str, indicator_pheno, pheno, p_column,
-                        indicator_cvt, cvt, n_cvt) == false) {
-      error = true;
-    }
-    if ((indicator_cvt).size() == 0) {
-      n_cvt = 1;
-    }
-
-    // Post-process covariates and phenotypes, obtain
-    // ni_test, save all useful covariates.
-    ProcessCvtPhen();
-
-    // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
-
-    file_str = file_oxford + ".bgen";
-    if (ReadFile_bgen(file_str, setSnps, W, indicator_idv, indicator_snp,
-                      snpInfo, maf_level, miss_level, hwe_level, r2_level,
-                      ns_test) == false) {
-      error = true;
-    }
-    gsl_matrix_free(W);
-
-    ns_total = indicator_snp.size();
-  }
+  trim_individuals(indicator_idv, ni_max);
 
   // Read genotype and phenotype file for PLINK format.
   if (!file_bfile.empty()) {
@@ -297,16 +269,16 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    auto W1 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W1);
 
     file_str = file_bfile + ".bed";
-    if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+    if (ReadFile_bed(file_str, setSnps, W1, indicator_idv, indicator_snp,
                      snpInfo, maf_level, miss_level, hwe_level, r2_level,
                      ns_test) == false) {
       error = true;
     }
-    gsl_matrix_free(W);
+    gsl_matrix_free(W1);
     ns_total = indicator_snp.size();
   }
 
@@ -330,17 +302,17 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    auto W2 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W2);
 
-    trim_individuals(indicator_idv, ni_max, mode_debug);
-    trim_individuals(indicator_cvt, ni_max, mode_debug);
-    if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
+    trim_individuals(indicator_idv, ni_max);
+    trim_individuals(indicator_cvt, ni_max);
+    if (ReadFile_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp,
                       maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
-                      mapRS2bp, mapRS2cM, snpInfo, ns_test, mode_debug) == false) {
+                      mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
       error = true;
     }
-    gsl_matrix_free(W);
+    gsl_matrix_free(W2);
 
     ns_total = indicator_snp.size();
   }
@@ -356,7 +328,7 @@ void PARAM::ReadFiles(void) {
 
     string file_name;
     size_t t = 0, ns_test_tmp = 0;
-    gsl_matrix *W;
+    gsl_matrix *W3 = NULL;
     while (!safeGetline(infile, file_name).eof()) {
       file_str = file_name + ".bim";
 
@@ -388,12 +360,12 @@ void PARAM::ReadFiles(void) {
         ProcessCvtPhen();
 
         // Obtain covariate matrix.
-        W = gsl_matrix_alloc(ni_test, n_cvt);
-        CopyCvt(W);
+        W3 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+        CopyCvt(W3);
       }
 
       file_str = file_name + ".bed";
-      if (ReadFile_bed(file_str, setSnps, W, indicator_idv, indicator_snp,
+      if (ReadFile_bed(file_str, setSnps, W3, indicator_idv, indicator_snp,
                        snpInfo, maf_level, miss_level, hwe_level, r2_level,
                        ns_test_tmp) == false) {
         error = true;
@@ -406,7 +378,7 @@ void PARAM::ReadFiles(void) {
       t++;
     }
 
-    gsl_matrix_free(W);
+    if (W3) gsl_matrix_free(W3);
 
     infile.close();
     infile.clear();
@@ -432,8 +404,8 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    gsl_matrix *W4 = gsl_matrix_safe_alloc(ni_test, n_cvt);
+    CopyCvt(W4);
 
     igzstream infile(file_mgeno.c_str(), igzstream::in);
     if (!infile) {
@@ -445,9 +417,9 @@ void PARAM::ReadFiles(void) {
     string file_name;
     size_t ns_test_tmp;
     while (!safeGetline(infile, file_name).eof()) {
-      if (ReadFile_geno(file_name, setSnps, W, indicator_idv, indicator_snp,
+      if (ReadFile_geno(file_name, setSnps, W4, indicator_idv, indicator_snp,
                         maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
-                        mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp, mode_debug) == false) {
+                        mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
         error = true;
       }
 
@@ -457,7 +429,7 @@ void PARAM::ReadFiles(void) {
       ns_total += indicator_snp.size();
     }
 
-    gsl_matrix_free(W);
+    gsl_matrix_free(W4);
 
     infile.close();
     infile.clear();
@@ -485,8 +457,8 @@ void PARAM::ReadFiles(void) {
     ProcessCvtPhen();
 
     // Obtain covariate matrix.
-    gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
-    CopyCvt(W);
+    // gsl_matrix *W5 = gsl_matrix_alloc(ni_test, n_cvt);
+    // CopyCvt(W5);
 
     if (ReadFile_gene(file_gene, vec_read, snpInfo, ng_total) == false) {
       error = true;
@@ -741,19 +713,6 @@ void PARAM::CheckParam(void) {
     }
   }
 
-  if (!file_oxford.empty()) {
-    str = file_oxford + ".bgen";
-    if (stat(str.c_str(), &fileInfo) == -1) {
-      cout << "error! fail to open .bgen file: " << str << endl;
-      error = true;
-    }
-    str = file_oxford + ".sample";
-    if (stat(str.c_str(), &fileInfo) == -1) {
-      cout << "error! fail to open .sample file: " << str << endl;
-      error = true;
-    }
-  }
-
   if ((!file_geno.empty() || !file_gene.empty())) {
     str = file_pheno;
     if (stat(str.c_str(), &fileInfo) == -1) {
@@ -864,11 +823,6 @@ void PARAM::CheckParam(void) {
     flag++;
   }
 
-  // WJA added.
-  if (!file_oxford.empty()) {
-    flag++;
-  }
-
   if (flag != 1 && a_mode != 15 && a_mode != 27 && a_mode != 28 &&
       a_mode != 43 && a_mode != 5 && a_mode != 61 && a_mode != 62 &&
       a_mode != 63 && a_mode != 66 && a_mode != 67) {
@@ -942,14 +896,12 @@ void PARAM::CheckParam(void) {
   enforce_fexists(file_snps, "open file");
   enforce_fexists(file_ksnps, "open file");
   enforce_fexists(file_gwasnps, "open file");
-  enforce_fexists(file_log, "open file");
   enforce_fexists(file_anno, "open file");
 
   if (!loco.empty()) {
     enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22,
                 "LOCO only works with LMM and K");
-    enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
-    enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)");
+    // enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
     enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)");
     enforce_msg(!file_anno.empty(),
                 "LOCO requires annotation file (-a switch)");
@@ -957,54 +909,15 @@ void PARAM::CheckParam(void) {
     enforce_msg(file_gwasnps.empty(), "LOCO does not allow -gwasnps switch");
   }
 
-  str = file_kin;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open relatedness matrix file: " << str << endl;
-    error = true;
-  }
-
-  str = file_mk;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open relatedness matrix file: " << str << endl;
-    error = true;
-  }
-
-  str = file_cvt;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open covariates file: " << str << endl;
-    error = true;
-  }
-
-  str = file_gxe;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open environmental covariate file: " << str << endl;
-    error = true;
-  }
-
-  str = file_weight;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open the residual weight file: " << str << endl;
-    error = true;
-  }
-
-  str = file_epm;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open estimated parameter file: " << str << endl;
-    error = true;
-  }
-
-  str = file_ebv;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open estimated breeding value file: " << str
-         << endl;
-    error = true;
-  }
-
-  str = file_read;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open total read file: " << str << endl;
-    error = true;
-  }
+  enforce_fexists(file_kin, "open file");
+  enforce_fexists(file_mk, "open file");
+  enforce_fexists(file_cvt, "open file");
+  enforce_fexists(file_gxe, "open file");
+  enforce_fexists(file_log, "open file");
+  enforce_fexists(file_weight, "open file");
+  enforce_fexists(file_epm, "open file");
+  enforce_fexists(file_ebv, "open file");
+  enforce_fexists(file_read, "open file");
 
   // Check if files are compatible with analysis mode.
   if (k_mode == 2 && !file_geno.empty()) {
@@ -1056,14 +969,6 @@ void PARAM::CheckParam(void) {
 
 void PARAM::CheckData(void) {
 
-  // WJA NOTE: I added this condition so that covariates can be added
-  // through sample, probably not exactly what is wanted.
-  if (file_oxford.empty()) {
-    if ((file_cvt).empty() || (indicator_cvt).size() == 0) {
-      n_cvt = 1;
-    }
-  }
-
   if ((a_mode == 66 || a_mode == 67) && (v_pve.size() != n_vc)) {
     cout << "error! the number of pve estimates does not equal to "
          << "the number of categories in the cat file:" << v_pve.size() << " "
@@ -1194,21 +1099,21 @@ void PARAM::CheckData(void) {
       cout << "## number of total genes = " << ng_total << endl;
     } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) {
       if (!loco.empty())
-        cout << "## leave one chromosome out (LOCO) = " << loco << endl;
-      cout << "## number of total SNPs    = " << ns_total << endl;
+        cout << "## leave one chromosome out (LOCO) = " << setw(8) << loco << endl;
+      cout << "## number of total SNPs/var        = " << setw(8) << ns_total << endl;
       if (setSnps.size())
-        cout << "## number of considered SNPS = " << setSnps.size() << endl;
+        cout << "## number of considered SNPS       = " << setw(8) << setSnps.size() << endl;
       if (setKSnps.size())
-        cout << "## number of SNPS for K    = " << setKSnps.size() << endl;
+        cout << "## number of SNPS for K            = " << setw(8) << setKSnps.size() << endl;
       if (setGWASnps.size())
-        cout << "## number of SNPS for GWAS = " << setGWASnps.size() << endl;
-      cout << "## number of analyzed SNPs = " << ns_test << endl;
+        cout << "## number of SNPS for GWAS         = " << setw(8) << setGWASnps.size() << endl;
+      cout << "## number of analyzed SNPs         = " << setw(8) << ns_test << endl;
     } else {
     }
   }
 
   // Set d_pace to 1000 for gene expression.
-  if (!file_gene.empty() && d_pace == 100000) {
+  if (!file_gene.empty() && d_pace == DEFAULT_PACE) {
     d_pace = 1000;
   }
 
@@ -1340,7 +1245,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
     }
   } else {
     if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
-                      calc_K, mode_debug) == false) {
+                      calc_K) == false) {
       error = true;
     }
   }
@@ -1360,7 +1265,7 @@ void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
     }
   } else {
     if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
-                      ni_test, ns_test, mode_debug) == false) {
+                      ni_test, ns_test) == false) {
       error = true;
     }
   }
@@ -1375,18 +1280,11 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) {
 
   if (!file_bfile.empty()) {
     file_str = file_bfile + ".bed";
-    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
+    // enforce_msg(loco.empty(), "FIXME: LOCO nyi");
     if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
         false) {
       error = true;
     }
-  } else if (!file_oxford.empty()) {
-    file_str = file_oxford + ".bgen";
-    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
-    if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
-        false) {
-      error = true;
-    }
   } else {
     file_str = file_geno;
     if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace,
diff --git a/src/param.h b/src/param.h
index ff279bd..9ad14b2 100644
--- a/src/param.h
+++ b/src/param.h
@@ -1,6 +1,8 @@
 /*
     Genome-wide Efficient Mixed Model Association (GEMMA)
-    Copyright (C) 2011-2017, Xiang Zhou
+    Copyright © 2011-2017, Xiang Zhou
+    Copyright © 2017, Peter Carbonetto
+    Copyright © 2017, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -26,7 +28,8 @@
 #include <set>
 #include <vector>
 
-#define K_BATCH_SIZE 10000 // #snps used for batched K
+#define K_BATCH_SIZE 20000 // #snps used for batched K
+#define DEFAULT_PACE 1000  // for display only
 
 using namespace std;
 
@@ -115,16 +118,16 @@ public:
 class PARAM {
 public:
   // IO-related parameters
-  bool mode_check = true;   // run data checks (slower)
-  bool mode_strict = false; // exit on some data checks
-  bool mode_silence;
-  bool mode_debug = false;
-  uint issue; // enable tests for issue on github tracker
+  // bool mode_check = true;   // run data checks (slower)
+  // bool mode_strict = false; // exit on some data checks
+  // bool mode_silence;
+  // bool mode_debug = false;
+  // uint issue; // enable tests for issue on github tracker
 
   uint a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
   int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
   vector<size_t> p_column; // Which phenotype column needs analysis.
-  size_t d_pace;           // Display pace
+  size_t d_pace = DEFAULT_PACE;   // Display pace (-pace switch)
 
   string file_bfile, file_mbfile;
   string file_geno, file_mgeno;
@@ -155,9 +158,6 @@ public:
   string file_ksnps;   // File SNPs for computing K
   string file_gwasnps; // File SNPs for computing GWAS
 
-  // WJA added.
-  string file_oxford;
-
   // QC-related parameters.
   double miss_level;
   double maf_level;
@@ -368,10 +368,4 @@ public:
 
 size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
 
-// Helpers for checking parameters
-#define enforce_fexists(fn, msg)                                               \
-  if (!fn.empty())                                                             \
-    enforce_msg(stat(fn.c_str(), &fileInfo) == 0,                              \
-                ((std::string(__STRING(fn)) + ": " + msg).c_str()));
-
 #endif
diff --git a/src/prdt.cpp b/src/prdt.cpp
index 9dc84bc..fc0abe8 100644
--- a/src/prdt.cpp
+++ b/src/prdt.cpp
@@ -227,7 +227,7 @@ void PRDT::AnalyzeBimbam(gsl_vector *y_prdt) {
 
   // Start reading genotypes and analyze.
   for (size_t t = 0; t < ns_total; ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == (ns_total - 1)) {
       ProgressBar("Reading SNPs  ", t, ns_total - 1);
     }
diff --git a/src/varcov.cpp b/src/varcov.cpp
index 39c3523..e9c7295 100644
--- a/src/varcov.cpp
+++ b/src/varcov.cpp
@@ -198,7 +198,7 @@ void VARCOV::CalcNB(vector<SNPINFO> &snpInfo_sort) {
            (snpInfo_sort[t2].cM - snpInfo_sort[t].cM < window_cm ||
             window_cm == 0) &&
            (snpInfo_sort[t2].base_position - snpInfo_sort[t].base_position <
-                window_bp ||
+            (long int) window_bp ||
             window_bp == 0) &&
            (n_nb < window_ns || window_ns == 0)) {
       t2++;
diff --git a/src/vc.cpp b/src/vc.cpp
index 1465f16..1a16c07 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -41,7 +41,7 @@
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_multiroots.h"
 
-#include "Eigen/Dense"
+// #include "Eigen/Dense"
 
 #include "eigenlib.h"
 #include "gzstream.h"
@@ -53,7 +53,7 @@
 #include "vc.h"
 
 using namespace std;
-using namespace Eigen;
+// using namespace Eigen;
 
 // In this file, X, Y are already transformed (i.e. UtX and UtY).
 void VC::CopyFromParam(PARAM &cPar) {
@@ -663,7 +663,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
   HEADER header;
 
   // Header.
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_vc(line, header);
 
   if (header.n_col == 0) {
@@ -678,7 +678,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
   while (!safeGetline(infile, line).eof()) {
 
     // do not read cor values this time; upto col_n-1.
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     n_total = 0;
     n_mis = 0;
@@ -688,6 +688,7 @@ void ReadFile_cor(const string &file_cor, const set<string> &setSnps,
     d_cm = 0;
     d_pos = 0;
     for (size_t i = 0; i < header.coln - 1; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -822,7 +823,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
 
   // Read header.
   HEADER header;
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_vc(line, header);
 
   if (header.n_col == 0) {
@@ -844,7 +845,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
   }
 
   while (!safeGetline(infile, line).eof()) {
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
 
     z = 0;
     beta = 0;
@@ -857,6 +858,7 @@ void ReadFile_beta(const bool flag_priorscale, const string &file_beta,
     af = 0;
     var_x = 0;
     for (size_t i = 0; i < header.coln; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -1055,7 +1057,7 @@ void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
   // Header.
   HEADER header;
 
-  !safeGetline(infile, line).eof();
+  safeGetline(infile, line).eof();
   ReadHeader_vc(line, header);
 
   while (!safeGetline(infile, line).eof()) {
@@ -1063,8 +1065,9 @@ void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
     // Do not read cor values this time; upto col_n-1.
     d_pos1 = 0;
     d_cm1 = 0;
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     for (size_t i = 0; i < header.coln - 1; i++) {
+      enforce(ch_ptr);
       if (header.rs_col != 0 && header.rs_col == i + 1) {
         rs = ch_ptr;
       }
@@ -1932,7 +1935,7 @@ void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
   size_t n1 = K->size1, n2 = K->size2;
   size_t n_vc = n2 / n1;
 
-  double d, y2_sum, tau_inv, se_tau_inv;
+  double d, y2_sum, tau_inv;
 
   // New matrices/vectors.
   gsl_matrix *K_scale = gsl_matrix_alloc(n1, n2);
@@ -2131,7 +2134,7 @@ void VC::CalcVCacl(const gsl_matrix *K, const gsl_matrix *W,
   // Compute variance for tau_inv.
   gsl_blas_dgemv(CblasNoTrans, 1.0, V_mat, y_scale, 0.0, n1_vec);
   gsl_blas_ddot(y_scale, n1_vec, &d);
-  se_tau_inv = sqrt(2 * d) / (double)n1;
+  // auto se_tau_inv = sqrt(2 * d) / (double)n1;  UNUSED
 
   // Transform pve back to the original scale and save data.
   v_pve.clear();
@@ -2238,7 +2241,7 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
   gsl_vector_mul(wz, w);
 
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
     }
@@ -2246,9 +2249,9 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     geno_mean = 0.0;
     n_miss = 0;
@@ -2260,7 +2263,7 @@ bool BimbamXwz(const string &file_geno, const int display_pace,
       if (indicator_idv[i] == 0) {
         continue;
       }
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
@@ -2491,7 +2494,7 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
   gsl_vector *geno_miss = gsl_vector_alloc(ni_test);
 
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
     }
@@ -2499,9 +2502,9 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     geno_mean = 0.0;
     n_miss = 0;
@@ -2513,7 +2516,7 @@ bool BimbamXtXwz(const string &file_geno, const int display_pace,
       if (indicator_idv[i] == 0) {
         continue;
       }
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
diff --git a/test/dev_test_suite.sh b/test/dev_test_suite.sh
index 0fc4423..0d3d8a0 100755
--- a/test/dev_test_suite.sh
+++ b/test/dev_test_suite.sh
@@ -1,29 +1,31 @@
 #!/usr/bin/env bash
 
 gemma=../bin/gemma
+# gemmaopts="-debug -strict"
+gemmaopts="-debug"
 
 # Related to https://github.com/genetics-statistics/GEMMA/issues/78
 testBXDStandardRelatednessMatrixKSingularError() {
     outn=BXDerr
     rm -f output/$outn.*
-    $gemma -g ../example/BXD_geno.txt.gz \
+    $gemma $gemmaopts \
+           -g ../example/BXD_geno.txt.gz \
            -p ../example/BXD_pheno.txt \
            -c ../example/BXD_covariates.txt \
            -a ../example/BXD_snps.txt \
            -gk \
-           -debug -o $outn
+           -o $outn
     assertEquals 22 $? # should show singular error
 }
 
 testBXDStandardRelatednessMatrixK() {
     outn=BXD
     rm -f output/$outn.*
-    $gemma -g ../example/BXD_geno.txt.gz \
+    $gemma $gemmaopts -g ../example/BXD_geno.txt.gz \
            -p ../example/BXD_pheno.txt \
            -c ../example/BXD_covariates2.txt \
            -a ../example/BXD_snps.txt \
            -gk \
-           -debug \
            -o $outn
     assertEquals 0 $?
     outfn=output/$outn.cXX.txt
@@ -31,28 +33,43 @@ testBXDStandardRelatednessMatrixK() {
     assertEquals "-116.11" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
+testBXDLMLikelihoodRatio() {
+    outn=BXD_LM_LR
+    $gemma $gemmaopts -g ../example/BXD_geno.txt.gz \
+           -p ../example/BXD_pheno.txt \
+           -c ../example/BXD_covariates2.txt \
+           -a ../example/BXD_snps.txt \
+           -k ./output/BXD.cXX.txt \
+           -lm 4 -maf 0.1 \
+           -o $outn
+    assertEquals 0 $?
+
+    outfn=output/$outn.assoc.txt
+    assertEquals "95134" `wc -w < $outfn`
+    assertEquals "3089042886.28" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
 testBXDLMMLikelihoodRatio() {
     outn=BXD_LMM_LR
-    $gemma -g ../example/BXD_geno.txt.gz \
+    $gemma $gemmaopts -g ../example/BXD_geno.txt.gz \
            -p ../example/BXD_pheno.txt \
            -c ../example/BXD_covariates2.txt \
            -a ../example/BXD_snps.txt \
            -k ./output/BXD.cXX.txt \
            -lmm 2 -maf 0.1 \
-           -debug \
            -o $outn
     assertEquals 0 $?
 
     outfn=output/$outn.assoc.txt
-    assertEquals "80498" `wc -w < $outfn`
+    assertEquals "73180" `wc -w < $outfn`
     assertEquals "3088458212.93" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
 testCenteredRelatednessMatrixKLOCO1() {
     outn=mouse_hs1940_LOCO1
     rm -f output/$outn.*
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
-           -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -debug -o $outn
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz -p ../example/mouse_hs1940.pheno.txt \
+           -a ../example/mouse_hs1940.anno.txt -snps ../example/mouse_hs1940_snps.txt -nind 400 -loco 1 -gk -o $outn
     assertEquals 0 $?
     grep "total computation time" < output/$outn.log.txt
     outfn=output/$outn.cXX.txt
@@ -65,7 +82,7 @@ testCenteredRelatednessMatrixKLOCO1() {
 testUnivariateLinearMixedModelLOCO1() {
     outn=mouse_hs1940_CD8_LOCO1_lmm
     rm -f output/$outn.*
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
 	   -n 1 \
 	   -loco 1 \
@@ -73,7 +90,47 @@ testUnivariateLinearMixedModelLOCO1() {
            -k ./output/mouse_hs1940_LOCO1.cXX.txt \
 	   -snps ../example/mouse_hs1940_snps.txt -lmm \
 	   -nind 400 \
-	   -debug \
+           -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    assertEquals 0 $?
+    outfn=output/$outn.assoc.txt
+    assertEquals "68" `wc -l < $outfn`
+    assertEquals "15465346.22" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testPlinkCenteredRelatednessMatrixKLOCO1() {
+    return 0
+    outn=mouse_hs1940_Plink_LOCO1
+    rm -f output/$outn.*
+    $gemma $gemmaopts -bfile ../example/mouse_hs1940 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -snps ../example/mouse_hs1940_snps.txt \
+           -nind 400 \
+           -loco 1 \
+           -gk \
+           -o $outn
+    assertEquals 0 $?
+    grep "total computation time" < output/$outn.log.txt
+    outfn=output/$outn.cXX.txt
+    assertEquals 0 $?
+    assertEquals "400" `wc -l < $outfn`
+    assertEquals "0.312" `head -c 5 $outfn`
+    assertEquals "71.03" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+
+testPlinkUnivariateLinearMixedModelLOCO1() {
+    return 0
+    outn=mouse_hs1940_CD8_Plink_LOCO1_lmm
+    rm -f output/$outn.*
+    $gemma $gemmaopts -bfile ../example/mouse_hs1940 \
+	   -n 1 \
+	   -loco 1 \
+           -k ./output/mouse_hs1940_Plink_LOCO1.cXX.txt \
+           -a ../example/mouse_hs1940.anno.txt \
+	   -snps ../example/mouse_hs1940_snps.txt -lmm \
+	   -nind 400 \
            -o $outn
     assertEquals 0 $?
     grep "total computation time" < output/$outn.log.txt
diff --git a/test/src/unittests-math.cpp b/test/src/unittests-math.cpp
index ac4c180..757c2dc 100644
--- a/test/src/unittests-math.cpp
+++ b/test/src/unittests-math.cpp
@@ -1,14 +1,23 @@
 #include <catch.hpp>
 #include <iostream>
 #include "gsl/gsl_matrix.h"
-#include "mathfunc.h"
+#include <cblas.h>
+
 #include <algorithm>
 #include <limits>
 #include <numeric>
 
+#include "debug.h"
+#include "mathfunc.h"
+#include "fastblas.h"
+#include "fastopenblas.h"
+
 using namespace std;
 
 TEST_CASE( "Math functions", "[math]" ) {
+  debug_set_debug_mode(true);
+  debug_set_no_check_mode(false);
+  debug_set_strict_mode(true);
   double data[] = { 2,-1, 0,
                    -1, 2,-1,
                     0,-1, 2};
@@ -51,3 +60,109 @@ TEST_CASE( "Math functions", "[math]" ) {
   REQUIRE (std::isnan(v3[2]));
   REQUIRE(has_nan(v3));
 }
+
+TEST_CASE("cblas_dgemm", "[math]") {
+   double *A, *B, *C;
+   int m, n, k, i, j;
+   double alpha, beta;
+
+   printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
+           " Intel(R) MKL function dgemm, where A, B, and  C are matrices and \n"
+           " alpha and beta are double precision scalars\n\n");
+
+   m = 2000, k = 200, n = 1000;
+   printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
+           " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
+   alpha = 1.0; beta = 0.0;
+
+   printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
+           " performance \n\n");
+   A = (double *)malloc( m*k*sizeof( double ));
+   B = (double *)malloc( k*n*sizeof( double ));
+   C = (double *)malloc( m*n*sizeof( double ));
+
+   printf (" Intializing matrix data \n\n");
+   for (i = 0; i < (m*k); i++) {
+     A[i] = (double)(i+1);
+   }
+
+   for (i = 0; i < (k*n); i++) {
+     B[i] = (double)(-i-1);
+   }
+
+   for (i = 0; i < (m*n); i++) {
+     C[i] = 0.0;
+   }
+
+   printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
+   assert(m==2000);
+   assert(k==200);
+   assert(n==1000);
+   //cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+   //            m, n, k, alpha, A, k, B, n, beta, C, n);
+   fast_cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+                    m, n, k, alpha, A, k, B, n, beta, C, n);
+
+   REQUIRE(trunc(C[0]) == -2666620100.0 );
+   REQUIRE(trunc(C[1]) == -2666640200.0 );
+   REQUIRE(trunc(C[2003]) == -10627000400.0 );
+
+}
+
+TEST_CASE("fast_dgemm", "[math]") {
+   double *A, *B, *C;
+   int m, n, k, i, j;
+   double alpha, beta;
+
+   printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
+           " Intel(R) MKL function dgemm, where A, B, and  C are matrices and \n"
+           " alpha and beta are double precision scalars\n\n");
+
+   m = 2000, k = 200, n = 1000;
+   printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
+           " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
+   alpha = 1.0; beta = 0.0;
+
+   printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
+           " performance \n\n");
+   A = (double *)malloc( m*k*sizeof( double ));
+   B = (double *)malloc( k*n*sizeof( double ));
+   C = (double *)malloc( m*n*sizeof( double ));
+
+   printf (" Intializing matrix data \n\n");
+   for (i = 0; i < (m*k); i++) {
+     A[i] = (double)(i+1);
+   }
+
+   for (i = 0; i < (k*n); i++) {
+     B[i] = (double)(-i-1);
+   }
+
+   for (i = 0; i < (m*n); i++) {
+     C[i] = 0.0;
+   }
+
+   printf (" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
+   // cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,
+   //            m, n, k, alpha, A, k, B, n, beta, C, n);
+   // eigenlib_dgemm(const char *TransA, const char *TransB, const double alpha,
+   //                const gsl_matrix *A, const gsl_matrix *B, const double beta,
+   //                gsl_matrix *C) {
+   gsl_matrix *AM = gsl_matrix_safe_alloc(m,k); // rows x cols
+   gsl_matrix *BM = gsl_matrix_safe_alloc(k,n);
+   gsl_matrix *CM = gsl_matrix_calloc(m,n);
+
+   fast_copy(AM,A);
+   fast_copy(BM,B);
+   fast_copy(CM,C);
+   fast_dgemm("N","N",alpha,AM,BM,beta,CM);
+   printf ("\n Computations completed.\n\n");
+   A = AM->data;
+   B = BM->data;
+   C = CM->data;
+
+   REQUIRE(trunc(C[0]) == -2666620100.0 );
+   REQUIRE(trunc(C[1]) == -2666640200.0 );
+   REQUIRE(trunc(C[2003]) == -10627000400.0 );
+
+}
diff --git a/test/test_suite.sh b/test/test_suite.sh
index 350fc27..7af33aa 100755
--- a/test/test_suite.sh
+++ b/test/test_suite.sh
@@ -1,13 +1,89 @@
 #!/usr/bin/env bash
 
 gemma=../bin/gemma
+gemmaopts="-debug"
+
+testBslmm1() {
+    outn=mouse_hs1940_CD8_bslmm
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 2 -a ../example/mouse_hs1940.anno.txt \
+           -bslmm \
+           -o $outn -w 1000 -s 10000 -seed 1
+    assertEquals 0 $?
+    outfn1=output/$outn.hyp.txt
+    outfn2=output/$outn.param.txt
+    # assertEquals "45181" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.0f",(substr($x,,0,6))) } END { printf "%.0f",$sum }' $outfn1`
+    # assertEquals "4043967139.42" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn2`
+}
+
+testBslmm2() {
+    outn=mouse_hs1940_CD8_train
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 2 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -gk 1 -o $outn
+    assertEquals 0 $?
+    outfn=output/$outn.cXX.txt
+    assertEquals "579.66" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testBslmm3() {
+    ## Fit a binary trait using a linear model
+    outn=mouse_hs1940_CD8_bslmm_cc1
+    $gemma $gemmaopts \
+           -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 4 \
+           -a ../example/mouse_hs1940.anno.txt \
+           -bslmm \
+           -o $outn \
+           -w 1000 -s 10000 -seed 1
+    assertEquals 0 $?
+    outfn=output/$outn.hyp.txt
+    # assertEquals "291" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.0f",(substr($x,,0,6))) } END { printf "%.0f",100*$sum }' $outfn`
+}
+
+testBslmm4() {
+    outn=mouse_hs1940_CD8_prdt_k
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 2 \
+           -epm ./output/mouse_hs1940_CD8_bslmm.param.txt \
+           -emu ./output/mouse_hs1940_CD8_bslmm.log.txt \
+           -ebv ./output/mouse_hs1940_CD8_bslmm.bv.txt \
+           -k ./output/mouse_hs1940_CD8_train.cXX.txt \
+           -predict \
+           -o $outn
+    assertEquals 0 $?
+    outfn=output/$outn.prdt.txt
+    # assertEquals "-60.33" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
+
+testBslmm5() {
+    ## Now, do prediction in the test set for the binary traits
+    ## If the traits were fitted using the linear model, then:
+    outn=mouse_hs1940_CD8_prdt_cc1
+    $gemma $gemmaopts \
+           -g ../example/mouse_hs1940.geno.txt.gz \
+           -p ../example/mouse_hs1940.pheno.txt \
+           -n 4 \
+           -epm ./output/mouse_hs1940_CD8_bslmm_cc1.param.txt \
+           -emu ./output/mouse_hs1940_CD8_bslmm_cc1.log.txt \
+           -predict \
+           -o $outn
+    assertEquals 0 $?
+    outfn=output/$outn.prdt.txt
+    assertEquals "550.67" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
+}
 
 testCenteredRelatednessMatrixKFullLOCO1() {
     outn=mouse_hs1940_full_LOCO1
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
            -a ../example/mouse_hs1940.anno.txt \
-           -loco 1 -gk -debug -o $outn
+           -loco 1 -gk -o $outn
     assertEquals 0 $?
     outfn=output/$outn.cXX.txt
     assertEquals "1940" `wc -l < $outfn`
@@ -16,14 +92,13 @@ testCenteredRelatednessMatrixKFullLOCO1() {
 
 testUnivariateLinearMixedModelFullLOCO1() {
     outn=mouse_hs1940_CD8_full_LOCO1_lmm
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
 	   -n 1 \
 	   -loco 1 \
            -a ../example/mouse_hs1940.anno.txt \
            -k ./output/mouse_hs1940_full_LOCO1.cXX.txt \
 	   -lmm \
-	   -debug \
            -o $outn
     assertEquals 0 $?
     grep "total computation time" < output/$outn.log.txt
@@ -34,9 +109,9 @@ testUnivariateLinearMixedModelFullLOCO1() {
 }
 
 testCenteredRelatednessMatrixK() {
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
-           -gk -o mouse_hs1940 -debug
+           -gk -o mouse_hs1940
     assertEquals 0 $?
     outfn=output/mouse_hs1940.cXX.txt
     assertEquals "1940" `wc -l < $outfn`
@@ -46,14 +121,13 @@ testCenteredRelatednessMatrixK() {
 }
 
 testUnivariateLinearMixedModel() {
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
            -n 1 \
            -a ../example/mouse_hs1940.anno.txt \
            -k ./output/mouse_hs1940.cXX.txt \
            -lmm \
-           -o mouse_hs1940_CD8_lmm \
-           -debug
+           -o mouse_hs1940_CD8_lmm
     assertEquals 0 $?
     grep "total computation time" < output/mouse_hs1940_CD8_lmm.log.txt
     assertEquals 0 $?
@@ -62,14 +136,13 @@ testUnivariateLinearMixedModel() {
     assertEquals "4038540440.86" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
 }
 
-testMultivariateLinearMixedModel() {
-    $gemma -g ../example/mouse_hs1940.geno.txt.gz \
+testLinearMixedModelPhenotypes() {
+    $gemma $gemmaopts -g ../example/mouse_hs1940.geno.txt.gz \
            -p ../example/mouse_hs1940.pheno.txt \
            -n 1 6 \
            -a ../example/mouse_hs1940.anno.txt \
            -k ./output/mouse_hs1940.cXX.txt \
-           -lmm -o mouse_hs1940_CD8MCH_lmm \
-           -debug
+           -lmm -o mouse_hs1940_CD8MCH_lmm
     assertEquals 0 $?
 
     outfn=output/mouse_hs1940_CD8MCH_lmm.assoc.txt
@@ -82,9 +155,8 @@ testPlinkStandardRelatednessMatrixK() {
     datadir=../example
     outfn=output/$testname.sXX.txt
     rm -f $outfn
-    $gemma -bfile $datadir/HLC \
-           -gk 2 -o $testname \
-           -debug
+    $gemma $gemmaopts -bfile $datadir/HLC \
+           -gk 2 -o $testname
     assertEquals 0 $?
     assertEquals "427" `wc -l < $outfn`
     assertEquals "-358.07" `perl -nle 'foreach $x (split(/\s+/,$_)) { $sum += sprintf("%.2f",(substr($x,,0,6))) } END { printf "%.2f",$sum }' $outfn`
@@ -92,15 +164,14 @@ testPlinkStandardRelatednessMatrixK() {
 
 # Test for https://github.com/genetics-statistics/GEMMA/issues/58
 # fixed GSLv2 NaN's that appeared with covariates.
-testPlinkMultivariateLinearMixedModel() {
-    testname=testPlinkMultivariateLinearMixedModel
+testPlinkLinearMixedModelCovariates() {
+    testname=testPlinkLinearMixedModelCovariates
     datadir=../example
-    $gemma -bfile $datadir/HLC \
+    $gemma $gemmaopts -bfile $datadir/HLC \
            -k output/testPlinkStandardRelatednessMatrixK.sXX.txt \
            -lmm 1 \
            -maf 0.1 \
            -c $datadir/HLC_covariates.txt \
-           -debug \
            -o $testname
     assertEquals 0 $?
     outfn=output/$testname.assoc.txt