1 files changed, 243 insertions, 584 deletions
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 134fbf9..ae8b747 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -39,8 +39,10 @@
 #include "gsl/gsl_vector.h"
 
 #include "eigenlib.h"
+
 #include "gzstream.h"
 #include "io.h"
+#include "fastblas.h"
 #include "lapack.h"
 #include "lmm.h"
 
@@ -56,9 +58,6 @@ void LMM::CopyFromParam(PARAM &cPar) {
   path_out = cPar.path_out;
   file_gene = cPar.file_gene;
 
-  // WJA added.
-  file_oxford = cPar.file_oxford;
-
   l_min = cPar.l_min;
   l_max = cPar.l_max;
   n_region = cPar.n_region;
@@ -107,10 +106,10 @@ void LMM::WriteFiles() {
   }
 
   auto common_header = [&] () {
-    if (a_mode != 2)
+    if (a_mode != 2) {
       outfile << "beta" << "\t";
-
-    outfile << "se" << "\t";
+      outfile << "se" << "\t";
+    }
 
     outfile << "logl_H1" << "\t";  // we may make this an option
 
@@ -139,10 +138,10 @@ void LMM::WriteFiles() {
   auto sumstats = [&] (SUMSTAT st) {
     outfile << scientific << setprecision(6);
 
-    if (a_mode != 2)
+    if (a_mode != 2) {
       outfile << st.beta << "\t";
-
-    outfile << st.se << "\t";
+      outfile << st.se << "\t";
+    }
 
     outfile << st.logl_H1 << "\t";
 
@@ -364,9 +363,9 @@ double LogL_f(double l, void *params) {
   double f = 0.0, logdet_h = 0.0, d;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -414,11 +413,11 @@ double LogL_dev1(double l, void *params) {
   double dev1 = 0.0, trace_Hi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -477,13 +476,13 @@ double LogL_dev2(double l, void *params) {
   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -554,13 +553,13 @@ void LogL_dev12(double l, void *params, double *dev1, double *dev2) {
   double trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_yy;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -637,10 +636,10 @@ double LogRL_f(double l, void *params) {
   double f = 0.0, logdet_h = 0.0, logdet_hiw = 0.0, d;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *Iab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *Iab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -702,11 +701,11 @@ double LogRL_dev1(double l, void *params) {
   double dev1 = 0.0, trace_Hi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -778,13 +777,13 @@ double LogRL_dev2(double l, void *params) {
   double dev2 = 0.0, trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -868,13 +867,13 @@ void LogRL_dev12(double l, void *params, double *dev1, double *dev2) {
   double trace_Hi = 0.0, trace_HiHi = 0.0;
   size_t index_ww;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_matrix *PPPab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *HiHiHi_eval = gsl_vector_alloc((p->eval)->size);
-  gsl_vector *v_temp = gsl_vector_alloc((p->eval)->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_matrix *PPPab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *HiHiHi_eval = gsl_vector_safe_alloc((p->eval)->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc((p->eval)->size);
 
   gsl_vector_memcpy(v_temp, p->eval);
   gsl_vector_scale(v_temp, l);
@@ -948,9 +947,9 @@ void LMM::CalcRLWald(const double &l, const FUNC_PARAM &params, double &beta,
 
   int df = (int)ni_test - (int)n_cvt - 1;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
 
   gsl_vector_memcpy(v_temp, params.eval);
   gsl_vector_scale(v_temp, l);
@@ -990,9 +989,9 @@ void LMM::CalcRLScore(const double &l, const FUNC_PARAM &params, double &beta,
 
   int df = (int)ni_test - (int)n_cvt - 1;
 
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(params.eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(params.eval->size);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(params.eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(params.eval->size);
 
   gsl_vector_memcpy(v_temp, params.eval);
   gsl_vector_scale(v_temp, l);
@@ -1031,7 +1030,7 @@ void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
   size_t index_ab;
   size_t n_cvt = UtW->size2;
 
-  gsl_vector *u_a = gsl_vector_alloc(Uty->size);
+  gsl_vector *u_a = gsl_vector_safe_alloc(Uty->size);
 
   for (size_t a = 1; a <= n_cvt + 2; ++a) {
     if (a == n_cvt + 1) {
@@ -1097,8 +1096,8 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, gsl_vector *ab) {
   size_t n_cvt = W->size2;
 
   double d;
-  gsl_vector *v_a = gsl_vector_alloc(y->size);
-  gsl_vector *v_b = gsl_vector_alloc(y->size);
+  gsl_vector *v_a = gsl_vector_safe_alloc(y->size);
+  gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
 
   for (size_t a = 1; a <= n_cvt + 2; ++a) {
     if (a == n_cvt + 1) {
@@ -1142,7 +1141,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
   size_t n_cvt = W->size2;
 
   double d;
-  gsl_vector *v_b = gsl_vector_alloc(y->size);
+  gsl_vector *v_b = gsl_vector_safe_alloc(y->size);
 
   for (size_t b = 1; b <= n_cvt + 2; ++b) {
     index_ab = GetabIndex(n_cvt + 1, b, n_cvt);
@@ -1167,6 +1166,7 @@ void Calcab(const gsl_matrix *W, const gsl_vector *y, const gsl_vector *x,
 void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
                       const gsl_matrix *UtW, const gsl_vector *Utx,
                       const gsl_matrix *W, const gsl_vector *x) {
+  debug_msg(file_gene);
   igzstream infile(file_gene.c_str(), igzstream::in);
   if (!infile) {
     cout << "error reading gene expression file:" << file_gene << endl;
@@ -1188,25 +1188,25 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_vector *y = gsl_vector_alloc(U->size1);
-  gsl_vector *Uty = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *y = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Uty = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   // Header.
   getline(infile, line);
 
   for (size_t t = 0; t < ng_total; t++) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == ng_total - 1) {
-      ProgressBar("Performing Analysis ", t, ng_total - 1);
+      ProgressBar("Performing Analysis", t, ng_total - 1);
     }
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
     rs = ch_ptr;
 
     c_phen = 0;
     for (size_t i = 0; i < indicator_idv.size(); ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -1271,35 +1271,37 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   return;
 }
 
-void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
-                        const gsl_matrix *UtW, const gsl_vector *Uty,
-                        const gsl_matrix *W, const gsl_vector *y,
-                        const set<string> gwasnps) {
-  debug_msg("entering");
+
+void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
+                  const gsl_matrix *U, const gsl_vector *eval,
+                  const gsl_matrix *UtW, const gsl_vector *Uty,
+                  const gsl_matrix *W, const gsl_vector *y,
+                  const set<string> gwasnps) {
   clock_t time_start = clock();
 
-  // LOCO support
+  // Subset/LOCO support
   bool process_gwasnps = gwasnps.size();
   if (process_gwasnps)
-    debug_msg("AnalyzeBimbam w. LOCO");
+    debug_msg("Analyze subset of SNPs (LOCO)");
 
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
   const size_t inds = U->size1;
-  gsl_vector *x = gsl_vector_alloc(inds); // #inds
-  gsl_vector *x_miss = gsl_vector_alloc(inds);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  enforce(inds == ni_test);
+  gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds
+  gsl_vector *x_miss = gsl_vector_safe_alloc(inds);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   // Create a large matrix with LMM_BATCH_SIZE columns for batched processing
   // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE);
   const size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize);
-
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize);
   enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
+  enforce(Xlarge->size1 == inds);
   gsl_matrix_set_zero(Xlarge);
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -1307,9 +1309,6 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
   // start reading genotypes and analyze
   size_t c = 0;
 
-  igzstream infile(file_geno.c_str(), igzstream::in);
-  enforce_msg(infile, "error reading genotype file");
-
   auto batch_compute = [&](size_t l) { // using a C++ closure
     // Compute SNPs in batch, note the computations are independent per SNP
     gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
@@ -1317,7 +1316,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
         gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
 
     time_start = clock();
-    eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+    fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
                    &UtXlarge_sub.matrix);
     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
@@ -1332,8 +1331,8 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
       time_start = clock();
       FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
 
-      double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-      double p_lrt = 0, p_score = 0;
+      double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0;
+      double p_lrt = 0.0, p_score = 0.0;
       double logl_H1 = 0.0;
 
       // 3 is before 1.
@@ -1361,54 +1360,69 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
     }
   };
 
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    // for every SNP
-    string line;
-    safeGetline(infile, line);
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+  const auto num_snps = indicator_snp.size();
+  const size_t progress_step = (num_snps/50>d_pace ? num_snps/50 : d_pace);
+
+  for (size_t t = 0; t < num_snps; ++t) {
+    if (t % progress_step == 0 || t == (num_snps - 1)) {
+      ProgressBar("Reading SNPs", t, num_snps - 1);
     }
     if (indicator_snp[t] == 0)
       continue;
 
-    char *ch_ptr = strtok((char *)line.c_str(), " , \t");
-    auto snp = string(ch_ptr);
+    auto tup = fetch_snp(t);
+    auto snp = get<0>(tup);
+    auto gs = get<1>(tup);
+
     // check whether SNP is included in gwasnps (used by LOCO)
     if (process_gwasnps && gwasnps.count(snp) == 0)
       continue;
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
 
-    double x_mean = 0.0;
-    int c_phen = 0;
-    int n_miss = 0;
+    // drop missing idv and plug mean values for missing geno
+    double x_total = 0.0; // sum genotype values to compute x_mean
+    uint pos = 0;         // position in target vector
+    uint n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
       // get the genotypes per individual and compute stats per SNP
-      ch_ptr = strtok(NULL, " , \t");
-      if (indicator_idv[i] == 0)
+      if (indicator_idv[i] == 0) // skip individual
         continue;
 
-      if (strcmp(ch_ptr, "NA") == 0) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
+      double geno = gs[i];
+      if (std::isnan(geno)) {
+        gsl_vector_set(x_miss, pos, 1.0);
         n_miss++;
       } else {
-        double geno = atof(ch_ptr);
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
+        gsl_vector_set(x, pos, geno);
+        x_total += geno;
       }
-      c_phen++;
+      pos++;
     }
+    enforce(pos == ni_test);
 
-    x_mean /= (double)(ni_test - n_miss);
+    const double x_mean = x_total/(double)(ni_test - n_miss);
 
+    // plug x_mean back into missing values
     for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
+      if (gsl_vector_get(x_miss, i) == 1.0) {
         gsl_vector_set(x, i, x_mean);
       }
     }
+
+    /* this is what below GxE does
+    for (size_t i = 0; i < ni_test; ++i) {
+      auto geno = gsl_vector_get(x, i);
+      if (std::isnan(geno)) {
+        gsl_vector_set(x, i, x_mean);
+        geno = x_mean;
+      }
+      if (x_mean > 1.0) {
+        gsl_vector_set(x, i, 2 - geno);
+      }
+    }
+    */
+    enforce(x->size == ni_test);
+
     // copy genotype values for SNP into Xlarge cache
     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
     gsl_vector_memcpy(&Xlarge_col.vector, x);
@@ -1418,6 +1432,7 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
       batch_compute(msize);
   }
   batch_compute(c % msize);
+  ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
   // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
   cout << endl;
 
@@ -1430,114 +1445,111 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
   gsl_matrix_free(Xlarge);
   gsl_matrix_free(UtXlarge);
 
-  infile.close();
-  infile.clear();
-
-  return;
 }
 
-void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
-                       const gsl_matrix *UtW, const gsl_vector *Uty,
-                       const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bed = file_bfile + ".bed";
-  ifstream infile(file_bed.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bed file:" << file_bed << endl;
-    return;
-  }
+void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
+                        const gsl_matrix *UtW, const gsl_vector *Uty,
+                        const gsl_matrix *W, const gsl_vector *y,
+                        const set<string> gwasnps) {
+  debug_msg(file_geno);
 
-  clock_t time_start = clock();
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  enforce_msg(infile, "error reading genotype file");
+  size_t prev_line = 0;
 
-  char ch[1];
-  bitset<8> b;
+  std::vector <double> gs;
+  gs.resize(ni_total);
 
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_bit, n_miss, ci_total, ci_test;
-  double geno, x_mean;
+  // fetch_snp is a callback function for every SNP row
+  std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
+    string line;
+    while (prev_line <= num) {
+      // also read SNPs that were skipped
+      safeGetline(infile, line);
+      prev_line++;
+    }
+    char *ch_ptr = strtok((char *)line.c_str(), " , \t");
+    enforce_msg(ch_ptr, "Parsing BIMBAM genofile"); // ch_ptr should not be NULL
 
-  // Calculate basic quantities.
-  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+    auto snp = string(ch_ptr);
+    ch_ptr = strtok_safe(NULL, " , \t"); // skip column
+    ch_ptr = strtok_safe(NULL, " , \t"); // skip column
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+    gs.assign (ni_total,nan("")); // wipe values
 
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
+    for (size_t i = 0; i < ni_total; ++i) {
+      ch_ptr = strtok(NULL, " , \t");
+      enforce_msg(ch_ptr,line.c_str());
+      if (strcmp(ch_ptr, "NA") != 0)
+        gs[i] = atof(ch_ptr);
+    }
+    return std::make_tuple(snp,gs);
+  };
 
-  gsl_matrix_set_zero(Uab);
-  CalcUab(UtW, Uty, Uab);
+  LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
 
+  infile.close();
+  infile.clear();
+}
+
+void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
+                       const gsl_matrix *UtW, const gsl_vector *Uty,
+                       const gsl_matrix *W, const gsl_vector *y,
+                       const set<string> gwasnps) {
+  string file_bed = file_bfile + ".bed";
+  debug_msg(file_bed);
+
+  ifstream infile(file_bed.c_str(), ios::binary);
+  enforce_msg(infile,"error reading genotype (.bed) file");
+
+  bitset<8> bset8;
+  char ch[1]; // buffer
   // Calculate n_bit and c, the number of bit for each SNP.
-  if (ni_total % 4 == 0) {
-    n_bit = ni_total / 4;
-  } else {
-    n_bit = ni_total / 4 + 1;
-  }
+  const size_t n_bit = (ni_total % 4 == 0 ? ni_total / 4 : ni_total / 4 + 1);
 
-  // Print the first three magic numbers.
+  // first three magic numbers.
   for (int i = 0; i < 3; ++i) {
     infile.read(ch, 1);
-    b = ch[0];
+    const bitset<8> b = ch[0];
   }
 
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < snpInfo.size(); ++t) {
-    if (indicator_snp[t] == 0)
-      continue;
-    t_last++;
-  }
-  for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
-    if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
+  std::vector <double> gs;
+  gs.resize(ni_total);
 
+  // fetch_snp is a callback function for every SNP row
+  std::function<SnpNameValues(size_t)>  fetch_snp = [&](size_t num) {
+    gs.assign (ni_total,nan("")); // wipe values
     // n_bit, and 3 is the number of magic numbers.
+    auto t = num;
     infile.seekg(t * n_bit + 3);
-
-    // Read genotypes.
-    x_mean = 0.0;
-    n_miss = 0;
-    ci_total = 0;
-    ci_test = 0;
+    auto ci_total = 0;
+    auto ci_test = 0;
+    // ---- for all genotypes
     for (int i = 0; i < n_bit; ++i) {
       infile.read(ch, 1);
-      b = ch[0];
+      bset8 = ch[0];
 
       // Minor allele homozygous: 2.0; major: 0.0.
       for (size_t j = 0; j < 4; ++j) {
         if ((i == (n_bit - 1)) && ci_total == (int)ni_total) {
           break;
         }
-        if (indicator_idv[ci_total] == 0) {
+        if (indicator_idv[ci_total] == 0) { // skip individual
           ci_total++;
           continue;
         }
 
-        if (b[2 * j] == 0) {
-          if (b[2 * j + 1] == 0) {
-            gsl_vector_set(x, ci_test, 2);
-            x_mean += 2.0;
+        if (bset8[2 * j] == 0) {
+          if (bset8[2 * j + 1] == 0) {
+            gs[ci_test] = 2.0;
           } else {
-            gsl_vector_set(x, ci_test, 1);
-            x_mean += 1.0;
+            gs[ci_test] = 1.0;
           }
         } else {
-          if (b[2 * j + 1] == 1) {
-            gsl_vector_set(x, ci_test, 0);
+          if (bset8[2 * j + 1] == 1) {
+            gs[ci_test] = 0.0;
           } else {
-            gsl_vector_set(x, ci_test, -9);
-            n_miss++;
+            gs[ci_test] = nan(""); // already set to NaN - originally was -9.0
           }
         }
 
@@ -1545,367 +1557,14 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
         ci_test++;
       }
     }
+    string snp="unknown";
+    return std::make_tuple(snp,gs);
+  };
 
-    x_mean /= (double)(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      geno = gsl_vector_get(x, i);
-      if (geno == -9) {
-        gsl_vector_set(x, i, x_mean);
-        geno = x_mean;
-      }
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1, for beta.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score, logl_H1};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
-
-  infile.close();
-  infile.clear();
-
-  return;
-}
-
-// WJA added.
-void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
-                      const gsl_matrix *UtW, const gsl_vector *Uty,
-                      const gsl_matrix *W, const gsl_vector *y) {
-  debug_msg("entering");
-  string file_bgen = file_oxford + ".bgen";
-  ifstream infile(file_bgen.c_str(), ios::binary);
-  if (!infile) {
-    cout << "error reading bgen file:" << file_bgen << endl;
-    return;
-  }
-
-  clock_t time_start = clock();
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_miss, c_phen;
-  double geno, x_mean;
-
-  // Calculate basic quantities.
-  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
-
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
-
-  // Create a large matrix.
-  size_t msize = LMM_BATCH_SIZE;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
-
-  gsl_matrix_set_zero(Uab);
-  CalcUab(UtW, Uty, Uab);
-
-  // Read in header.
-  uint32_t bgen_snp_block_offset;
-  uint32_t bgen_header_length;
-  uint32_t bgen_nsamples;
-  uint32_t bgen_nsnps;
-  uint32_t bgen_flags;
-  infile.read(reinterpret_cast<char *>(&bgen_snp_block_offset), 4);
-  infile.read(reinterpret_cast<char *>(&bgen_header_length), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsnps), 4);
-  bgen_snp_block_offset -= 4;
-  infile.read(reinterpret_cast<char *>(&bgen_nsamples), 4);
-  bgen_snp_block_offset -= 4;
-  infile.ignore(4 + bgen_header_length - 20);
-  bgen_snp_block_offset -= 4 + bgen_header_length - 20;
-  infile.read(reinterpret_cast<char *>(&bgen_flags), 4);
-  bgen_snp_block_offset -= 4;
-  bool CompressedSNPBlocks = bgen_flags & 0x1;
-
-  infile.ignore(bgen_snp_block_offset);
-
-  double bgen_geno_prob_AA, bgen_geno_prob_AB, bgen_geno_prob_BB;
-  double bgen_geno_prob_non_miss;
-
-  uint32_t bgen_N;
-  uint16_t bgen_LS;
-  uint16_t bgen_LR;
-  uint16_t bgen_LC;
-  uint32_t bgen_SNP_pos;
-  uint32_t bgen_LA;
-  std::string bgen_A_allele;
-  uint32_t bgen_LB;
-  std::string bgen_B_allele;
-  uint32_t bgen_P;
-  size_t unzipped_data_size;
-  string id;
-  string rs;
-  string chr;
-  std::cout << "Warning: WJA hard coded SNP missingness "
-            << "threshold of 10%" << std::endl;
-
-  // Start reading genotypes and analyze.
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-    t_last++;
-  }
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
-    }
-    if (indicator_snp[t] == 0) {
-      continue;
-    }
-
-    // Read SNP header.
-    id.clear();
-    rs.clear();
-    chr.clear();
-    bgen_A_allele.clear();
-    bgen_B_allele.clear();
-
-    infile.read(reinterpret_cast<char *>(&bgen_N), 4);
-    infile.read(reinterpret_cast<char *>(&bgen_LS), 2);
-
-    id.resize(bgen_LS);
-    infile.read(&id[0], bgen_LS);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LR), 2);
-    rs.resize(bgen_LR);
-    infile.read(&rs[0], bgen_LR);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LC), 2);
-    chr.resize(bgen_LC);
-    infile.read(&chr[0], bgen_LC);
-
-    infile.read(reinterpret_cast<char *>(&bgen_SNP_pos), 4);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LA), 4);
-    bgen_A_allele.resize(bgen_LA);
-    infile.read(&bgen_A_allele[0], bgen_LA);
-
-    infile.read(reinterpret_cast<char *>(&bgen_LB), 4);
-    bgen_B_allele.resize(bgen_LB);
-    infile.read(&bgen_B_allele[0], bgen_LB);
-
-    uint16_t unzipped_data[3 * bgen_N];
-
-    if (indicator_snp[t] == 0) {
-      if (CompressedSNPBlocks)
-        infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      else
-        bgen_P = 6 * bgen_N;
-
-      infile.ignore(static_cast<size_t>(bgen_P));
-
-      continue;
-    }
-
-    if (CompressedSNPBlocks) {
-      infile.read(reinterpret_cast<char *>(&bgen_P), 4);
-      uint8_t zipped_data[bgen_P];
-
-      unzipped_data_size = 6 * bgen_N;
-
-      infile.read(reinterpret_cast<char *>(zipped_data), bgen_P);
-
-      int result = uncompress(reinterpret_cast<Bytef *>(unzipped_data),
-                              reinterpret_cast<uLongf *>(&unzipped_data_size),
-                              reinterpret_cast<Bytef *>(zipped_data),
-                              static_cast<uLong>(bgen_P));
-      assert(result == Z_OK);
-
-    } else {
-
-      bgen_P = 6 * bgen_N;
-      infile.read(reinterpret_cast<char *>(unzipped_data), bgen_P);
-    }
-
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(x_miss);
-    for (size_t i = 0; i < bgen_N; ++i) {
-      if (indicator_idv[i] == 0) {
-        continue;
-      }
-
-      bgen_geno_prob_AA = static_cast<double>(unzipped_data[i * 3]) / 32768.0;
-      bgen_geno_prob_AB =
-          static_cast<double>(unzipped_data[i * 3 + 1]) / 32768.0;
-      bgen_geno_prob_BB =
-          static_cast<double>(unzipped_data[i * 3 + 2]) / 32768.0;
-
-      // WJA.
-      bgen_geno_prob_non_miss =
-          bgen_geno_prob_AA + bgen_geno_prob_AB + bgen_geno_prob_BB;
-      if (bgen_geno_prob_non_miss < 0.9) {
-        gsl_vector_set(x_miss, c_phen, 0.0);
-        n_miss++;
-      } else {
-
-        bgen_geno_prob_AA /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_AB /= bgen_geno_prob_non_miss;
-        bgen_geno_prob_BB /= bgen_geno_prob_non_miss;
-
-        geno = 2.0 * bgen_geno_prob_BB + bgen_geno_prob_AB;
-
-        gsl_vector_set(x, c_phen, geno);
-        gsl_vector_set(x_miss, c_phen, 1.0);
-        x_mean += geno;
-      }
-      c_phen++;
-    }
-
-    x_mean /= static_cast<double>(ni_test - n_miss);
-
-    for (size_t i = 0; i < ni_test; ++i) {
-      if (gsl_vector_get(x_miss, i) == 0) {
-        gsl_vector_set(x, i, x_mean);
-      }
-      geno = gsl_vector_get(x, i);
-    }
-
-    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
-
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score, logl_H1};
-        sumStat.push_back(SNPs);
-      }
-    }
-  }
-  cout << endl;
-
-  gsl_vector_free(x);
-  gsl_vector_free(x_miss);
-  gsl_vector_free(Utx);
-  gsl_matrix_free(Uab);
-  gsl_vector_free(ab);
-
-  gsl_matrix_free(Xlarge);
-  gsl_matrix_free(UtXlarge);
+  LMM::Analyze(fetch_snp,U,eval,UtW,Uty,W,y,gwasnps);
 
   infile.close();
   infile.clear();
-
-  return;
 }
 
 void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
@@ -1914,10 +1573,10 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
                   vector<pair<size_t, double>> &pos_loglr) {
   double logl_H0, logl_H1, log_lr, lambda0, lambda1;
 
-  gsl_vector *w = gsl_vector_alloc(Uty->size);
-  gsl_matrix *Utw = gsl_matrix_alloc(Uty->size, 1);
-  gsl_matrix *Uab = gsl_matrix_alloc(Uty->size, 6);
-  gsl_vector *ab = gsl_vector_alloc(6);
+  gsl_vector *w = gsl_vector_safe_alloc(Uty->size);
+  gsl_matrix *Utw = gsl_matrix_safe_alloc(Uty->size, 1);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(Uty->size, 6);
+  gsl_vector *ab = gsl_vector_safe_alloc(6);
 
   gsl_vector_set_zero(ab);
   gsl_vector_set_all(w, 1.0);
@@ -2122,8 +1781,8 @@ void CalcLambda(const char func_name, const gsl_vector *eval,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -2145,8 +1804,8 @@ void CalcPve(const gsl_vector *eval, const gsl_matrix *UtW,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -2172,15 +1831,15 @@ void CalcLmmVgVeBeta(const gsl_vector *eval, const gsl_matrix *UtW,
   size_t n_cvt = UtW->size2, ni_test = UtW->size1;
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_matrix *Uab = gsl_matrix_alloc(ni_test, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
-  gsl_matrix *Pab = gsl_matrix_alloc(n_cvt + 2, n_index);
-  gsl_vector *Hi_eval = gsl_vector_alloc(eval->size);
-  gsl_vector *v_temp = gsl_vector_alloc(eval->size);
-  gsl_matrix *HiW = gsl_matrix_alloc(eval->size, UtW->size2);
-  gsl_matrix *WHiW = gsl_matrix_alloc(UtW->size2, UtW->size2);
-  gsl_vector *WHiy = gsl_vector_alloc(UtW->size2);
-  gsl_matrix *Vbeta = gsl_matrix_alloc(UtW->size2, UtW->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
+  gsl_matrix *Pab = gsl_matrix_safe_alloc(n_cvt + 2, n_index);
+  gsl_vector *Hi_eval = gsl_vector_safe_alloc(eval->size);
+  gsl_vector *v_temp = gsl_vector_safe_alloc(eval->size);
+  gsl_matrix *HiW = gsl_matrix_safe_alloc(eval->size, UtW->size2);
+  gsl_matrix *WHiW = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
+  gsl_vector *WHiy = gsl_vector_safe_alloc(UtW->size2);
+  gsl_matrix *Vbeta = gsl_matrix_safe_alloc(UtW->size2, UtW->size2);
 
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
@@ -2262,13 +1921,13 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *x_miss = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
-  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
   gsl_matrix_view UtW_expand_mat =
       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
   gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
@@ -2278,24 +1937,24 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
 
   // Start reading genotypes and analyze.
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    safeGetline(infile, line).eof();
     if (t % d_pace == 0 || t == (ns_total - 1)) {
-      ProgressBar("Reading SNPs  ", t, ns_total - 1);
+      ProgressBar("Reading SNPs", t, ns_total - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;
     }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    ch_ptr = strtok_safe((char *)line.c_str(), " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
+    ch_ptr = strtok_safe(NULL, " , \t");
 
     x_mean = 0.0;
     c_phen = 0;
     n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
+      ch_ptr = strtok_safe(NULL, " , \t");
       if (indicator_idv[i] == 0) {
         continue;
       }
@@ -2390,8 +2049,8 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
                           const gsl_matrix *UtW, const gsl_vector *Uty,
                           const gsl_matrix *W, const gsl_vector *y,
                           const gsl_vector *env) {
-  debug_msg("entering");
   string file_bed = file_bfile + ".bed";
+  debug_msg(file_bed);
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
     cout << "error reading bed file:" << file_bed << endl;
@@ -2412,12 +2071,12 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 2 + 1) * (n_cvt + 2 + 2) / 2;
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *Utx = gsl_vector_alloc(U->size2);
-  gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
-  gsl_vector *ab = gsl_vector_alloc(n_index);
+  gsl_vector *x = gsl_vector_safe_alloc(U->size1);
+  gsl_vector *Utx = gsl_vector_safe_alloc(U->size2);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(U->size2, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
 
-  gsl_matrix *UtW_expand = gsl_matrix_alloc(U->size1, UtW->size2 + 2);
+  gsl_matrix *UtW_expand = gsl_matrix_safe_alloc(U->size1, UtW->size2 + 2);
   gsl_matrix_view UtW_expand_mat =
       gsl_matrix_submatrix(UtW_expand, 0, 0, U->size1, UtW->size2);
   gsl_matrix_memcpy(&UtW_expand_mat.matrix, UtW);
@@ -2440,7 +2099,7 @@ void LMM::AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
 
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
     if (t % d_pace == 0 || t == snpInfo.size() - 1) {
-      ProgressBar("Reading SNPs  ", t, snpInfo.size() - 1);
+      ProgressBar("Reading SNPs", t, snpInfo.size() - 1);
     }
     if (indicator_snp[t] == 0) {
       continue;