22 files changed, 1619 insertions, 360 deletions
diff --git a/src/checkpoint.h b/src/checkpoint.h
index c2457d9..05fd836 100644
--- a/src/checkpoint.h
+++ b/src/checkpoint.h
@@ -1,7 +1,7 @@
 /*
     Checkpoints for pangemma propagators
 
-    Copyright © 2015, Pjotr Prins
+    Copyright © 2025, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -26,6 +26,14 @@ using namespace std;
 
 extern string checkpoint_name;
 
+/*
+    Checkpoint functions display a message in debug mode when
+    reached. If the (unique) msg (tag) is equal to the global
+    checkpoint_name the program will stop. The idea is that we can
+    stop computation at any checkpoint. Later we can write the code to
+    restart at a checkpoint.
+ */
+
 void checkpoint_run(string msg, string filename, string srcfilename, int line, string funcname);
 
 #define checkpoint(msg, fname)						\
diff --git a/src/debug.cpp b/src/debug.cpp
index 0aa4bfc..6cefcc7 100644
--- a/src/debug.cpp
+++ b/src/debug.cpp
@@ -139,6 +139,7 @@ inline int fedisableexcept(unsigned int excepts)
 
 #endif
 
+/*
 void enable_segfpe() {
   if (!is_fpe_check_mode() || is_legacy_mode()) return;
   #ifdef __GNUC__
@@ -159,6 +160,9 @@ void disable_segfpe() {
     #endif
   #endif
 }
+*/
+
+#ifndef NDEBUG
 
 void write(const char *s, const char *msg) {
   if (!is_debug_data_mode() && !is_debug_dump_mode()) return;
@@ -230,6 +234,8 @@ void write(const gsl_matrix *m, const char *msg) {
           cout << "}" << endl;
 }
 
+#endif // NDEBUG
+
 /*
   Helper function to make sure gsl allocations do their job because
   gsl_matrix_alloc does not initiatize values (behaviour that changed
diff --git a/src/debug.h b/src/debug.h
index 0489a81..a32bfd2 100644
--- a/src/debug.h
+++ b/src/debug.h
@@ -60,11 +60,22 @@ void disable_segfpe();
   { auto x = m * n;                                      \
     enforce_msg(x / m == n, "multiply integer overflow"); }
 
+#ifndef NDEBUG
+
 void write(const double d, const char *msg = "");
 void write(const char *s, const char *msg = "");
 void write(const gsl_vector *v, const char *msg = "");
 void write(const gsl_matrix *m, const char *msg = "");
 
+#else // NDEBUG
+
+inline void write(const double d, const char *msg = "") {};
+inline void write(const char *s, const char *msg = "") {};
+inline void write(const gsl_vector *v, const char *msg = "") {};
+inline void write(const gsl_matrix *m, const char *msg = "") {};
+
+#endif // NDEBUG
+
 gsl_matrix *gsl_matrix_safe_alloc(size_t rows,size_t cols);
 int gsl_matrix_safe_memcpy (gsl_matrix *dest, const gsl_matrix *src);
 void gsl_matrix_safe_free (gsl_matrix *v);
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 6c2b3f4..a39d67a 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -62,6 +62,7 @@ extern "C" {
 #include "vc.h"
 #include "debug.h"
 #include "version.h"
+#include <guile_api.h>
 
 using namespace std;
 
@@ -81,10 +82,12 @@ void gemma_gsl_error_handler (const char * reason,
 #include <openblas_config.h>
 #endif
 
+using namespace gemmaguile;
+
 void GEMMA::PrintHeader(void) {
 
   cout <<
-    "GEMMA forked executable --- part of PanGEMMA " << version << " (" << date << ") by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl;
+      "Pangemma " << version << " --- GEMMA 0.98.5 compatible executable (" << date << ") with" <<  OPENBLAS_VERSION << "and guile " << global_guile_version() << endl << "by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl ;
   return;
 }
 
@@ -1653,7 +1656,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
   clock_t time_begin, time_start;
   time_begin = clock();
 
-  if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
+  // if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
 
   // Read Files.
   cout << "Reading Files ... " << endl;
@@ -1908,32 +1911,52 @@ void GEMMA::BatchRun(PARAM &cPar) {
   }
 
   // Generate Kinship matrix (optionally using LOCO)
-  if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
+  // if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
+  if (is_kinship_compute(cPar.a_mode)) {
     cout << "Calculating Relatedness Matrix ... " << endl;
 
-    gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
-    enforce_msg(G, "allocate G"); // just to be sure
+    if (cPar.is_mdb) {
+      time_start = clock();
+      auto K = cPar.MdbCalcKin();
+      cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      if (cPar.a_mode == M_KIN) {
+        cPar.WriteMatrix(K, "cXX");
+      } else { // M_KIN2
+        cPar.WriteMatrix(K, "sXX");
+      }
+
+      gsl_matrix_safe_free(K);
+      return;
+    }
+
+    enforce(cPar.ni_total > 0);
+    gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
+    enforce_msg(K, "allocate G"); // just to be sure
 
     time_start = clock();
 
-    cPar.CalcKin(G);
+    if (cPar.error == true) {
+      cout << "error! failed to prepare for calculating relatedness matrix. " << endl;
+      return;
+    }
+    cPar.CalcKin(K);
 
     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
     if (cPar.error == true) {
-      cout << "error! fail to calculate relatedness matrix. " << endl;
+      cout << "error! failed to calculate relatedness matrix. " << endl;
       return;
     }
 
     // Now we have the Kinship matrix test it
-    validate_K(G);
+    validate_K(K);
 
     if (cPar.a_mode == M_KIN) {
-      cPar.WriteMatrix(G, "cXX");
+      cPar.WriteMatrix(K, "cXX");
     } else { // M_KIN2
-      cPar.WriteMatrix(G, "sXX");
+      cPar.WriteMatrix(K, "sXX");
     }
 
-    gsl_matrix_safe_free(G);
+    gsl_matrix_safe_free(K);
   }
 
   // Compute the LDSC weights (not implemented yet)
@@ -1943,7 +1966,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     VARCOV cVarcov;
     cVarcov.CopyFromParam(cPar);
 
-    if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
+    // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
 
     if (!cPar.file_bfile.empty()) {
       cVarcov.AnalyzePlink();
@@ -2058,7 +2081,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     VARCOV cVarcov;
     cVarcov.CopyFromParam(cPar);
 
-    if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
+    // if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
 
     if (!cPar.file_bfile.empty()) {
       cVarcov.AnalyzePlink();
@@ -2569,19 +2592,23 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == M_LMM9 ||
       cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen
     write(cPar.a_mode, "Mode");
-    gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+    auto n_ph    = cPar.n_ph;                                      // # of trait values
+    auto ni_test = cPar.ni_test;                                   // # of individuals to test
+    enforce(n_ph > 0);
+    enforce(ni_test > 0);
+    gsl_matrix *Y = gsl_matrix_safe_alloc(ni_test, n_ph);          // Y is ind x phenotypes
     enforce_msg(Y, "allocate Y"); // just to be sure
-    gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
-    gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
-                                                          // matrix
-    gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
-    gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
-    gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
-    gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
-    gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
-    gsl_vector *eval = gsl_vector_calloc(Y->size1);
-    gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
-    gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
+    gsl_matrix *W = gsl_matrix_safe_alloc(ni_test, cPar.n_cvt);    // W is ind x covariates
+    gsl_matrix *B = gsl_matrix_safe_alloc(n_ph, W->size2);         // B is a d by c (effect size)
+    gsl_matrix *se_B = gsl_matrix_safe_alloc(n_ph, W->size2);      // se
+    enforce(ni_test > 0);
+    gsl_matrix *K = gsl_matrix_safe_alloc(ni_test, ni_test);       // Kinship aka as G/GRM in the equation ind x ind
+    gsl_matrix *U = gsl_matrix_safe_alloc(ni_test, ni_test);
+    gsl_matrix *UtW = gsl_matrix_calloc(ni_test, W->size2);        // Transformed ind x covariates
+    gsl_matrix *UtY = gsl_matrix_calloc(ni_test, n_ph);            // Transformed ind x phenotypes
+    gsl_vector *eval = gsl_vector_calloc(ni_test);                 // eigen values
+    gsl_vector *env = gsl_vector_safe_alloc(ni_test);              // E environment
+    gsl_vector *weight = gsl_vector_safe_alloc(ni_test);           // weights
     debug_msg("Started on LMM");
     assert_issue(is_issue(26), UtY->data[0] == 0.0);
 
@@ -2592,38 +2619,38 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.CopyGxe(env);
     }
 
-    // read relatedness matrix G
+    // read relatedness matrix K
     if (!(cPar.file_kin).empty()) {
       ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
-                   cPar.k_mode, cPar.error, G);
+                   cPar.k_mode, cPar.error, K);
       debug_msg("Read K/GRM file");
       if (cPar.error == true) {
         cout << "error! fail to read kinship/relatedness file. " << endl;
         return;
       }
 
-      // center matrix G
-      CenterMatrix(G);
-      validate_K(G);
-      write(G, "G");
+      // center matrix K
+      CenterMatrix(K);
+      validate_K(K);
+      write(K, "K");
 
       // is residual weights are provided, then
       if (!cPar.file_weight.empty()) {
         cPar.CopyWeight(weight);
         double d, wi, wj;
-        for (size_t i = 0; i < G->size1; i++) {
+        for (size_t i = 0; i < K->size1; i++) {
           wi = gsl_vector_get(weight, i);
-          for (size_t j = i; j < G->size2; j++) {
+          for (size_t j = i; j < K->size2; j++) {
             wj = gsl_vector_get(weight, j);
-            d = gsl_matrix_get(G, i, j);
+            d = gsl_matrix_get(K, i, j);
             if (wi <= 0 || wj <= 0) {
               d = 0;
             } else {
               d /= safe_sqrt(wi * wj);
             }
-            gsl_matrix_set(G, i, j, d);
+            gsl_matrix_set(K, i, j, d);
             if (j != i) {
-              gsl_matrix_set(G, j, i, d);
+              gsl_matrix_set(K, j, i, d);
             }
           }
         }
@@ -2634,9 +2661,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
       time_start = clock();
 
       if (cPar.a_mode == M_EIGEN) {
-        cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
+        cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0);
       } else {
-        cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
+        cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0);
       }
       // write(eval,"eval");
 
@@ -2693,8 +2720,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
       LMM cLmm;
       cLmm.CopyFromParam(cPar);
 
-      gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
-      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+      gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get a single phenotype column
+      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and transformed column
 
       cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
                        &Y_col.vector); // y is the predictor, not the phenotype
@@ -2811,34 +2838,40 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
           // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
 
-          gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
-          gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+          gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get pheno column
+          gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed
           write(&Y_col.vector, "Y_col");
 
-          if (!cPar.file_bfile.empty()) {
-            // PLINK analysis
-            if (cPar.file_gxe.empty()) {
-              cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
-                                &Y_col.vector, cPar.setGWASnps);
-            }
-            else {
-              cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
-                                   &Y_col.vector, env);
-            }
+          if (cPar.is_mdb) {
+            cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, cPar.loco);
+            cLmm.CopyToParam(cPar);
           }
           else {
-            // BIMBAM analysis
-
-            if (cPar.file_gxe.empty()) {
-              cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
-                                 &Y_col.vector, cPar.setGWASnps);
-            } else {
-              cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
-                                    &Y_col.vector, env);
+            if (!cPar.file_bfile.empty()) {
+              // PLINK analysis
+              if (cPar.file_gxe.empty()) {
+                cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+                                  &Y_col.vector, cPar.setGWASnps);
+              }
+              else {
+                cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+                                     &Y_col.vector, env);
+              }
+            }
+            else {
+              // BIMBAM analysis
+
+              if (cPar.file_gxe.empty()) {
+                cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+                                   &Y_col.vector, cPar.setGWASnps);
+              } else {
+                cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+                                      &Y_col.vector, env);
+              }
             }
+            cLmm.WriteFiles(); // write output files
+            cLmm.CopyToParam(cPar);
           }
-          cLmm.WriteFiles();
-          cLmm.CopyToParam(cPar);
         } else {
           debug_msg("fit mvLMM (multiple phenotypes)");
           MVLMM cMvlmm;
@@ -2873,7 +2906,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     gsl_matrix_safe_free(W);
     gsl_matrix_warn_free(B); // sometimes unused
     gsl_matrix_warn_free(se_B);
-    gsl_matrix_warn_free(G);
+    gsl_matrix_warn_free(K);
     gsl_matrix_safe_free(U);
     gsl_matrix_safe_free(UtW);
     gsl_matrix_safe_free(UtY);
@@ -2898,7 +2931,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     // run bvsr if rho==1
     if (cPar.rho_min == 1 && cPar.rho_max == 1) {
       // read genotypes X (not UtX)
-      cPar.ReadGenotypes(UtX, G, false);
+      cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
       // perform BSLMM analysis
       BSLMM cBslmm;
@@ -2916,7 +2949,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
       // read relatedness matrix G
       if (!(cPar.file_kin).empty()) {
-        cPar.ReadGenotypes(UtX, G, false);
+        cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
         // read relatedness matrix G
         ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
@@ -2930,7 +2963,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
         CenterMatrix(G);
         validate_K(G);
       } else {
-        cPar.ReadGenotypes(UtX, G, true);
+        cPar.ReadBIMBAMGenotypes(UtX, G, true);
       }
 
       // eigen-decomposition and calculate trace_G
@@ -3008,7 +3041,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       // run bvsr if rho==1
       if (cPar.rho_min == 1 && cPar.rho_max == 1) {
         // read genotypes X (not UtX)
-        cPar.ReadGenotypes(UtX, G, false);
+        cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
         // perform BSLMM analysis
         BSLMM cBslmm;
@@ -3027,7 +3060,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // read relatedness matrix G
         if (!(cPar.file_kin).empty()) {
-          cPar.ReadGenotypes(UtX, G, false);
+          cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
           // read relatedness matrix G
           ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
@@ -3042,7 +3075,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
           validate_K(G);
 
         } else {
-          cPar.ReadGenotypes(UtX, G, true);
+          cPar.ReadBIMBAMGenotypes(UtX, G, true);
         }
 
         // eigen-decomposition and calculate trace_G
@@ -3168,8 +3201,8 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
   }
 
   outfile << "##" << endl;
-  outfile << "## GEMMA Version    = " << version << " (" << date << ")" << endl;
-  outfile << "## Build profile    = " << GEMMA_PROFILE << endl ;
+  outfile << "## PANGEMMA Version = " << version << " (" << date << ")" << endl;
+  // outfile << "## Build profile    = " << GEMMA_PROFILE << endl ;
   #ifdef __GNUC__
   outfile << "## GCC version      = " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ << endl;
   #endif
diff --git a/src/gemma.h b/src/gemma.h
index b34a958..fa7c24d 100644
--- a/src/gemma.h
+++ b/src/gemma.h
@@ -64,6 +64,7 @@ public:
   void Assign(int argc, char **argv, PARAM &cPar);
   void BatchRun(PARAM &cPar);
   void WriteLog(int argc, char **argv, PARAM &cPar);
+  inline bool is_kinship_compute(uint a_mode) {  return (a_mode == M_KIN || a_mode == M_KIN2); }
 };
 
 #endif
diff --git a/src/gemma_api.cpp b/src/gemma_api.cpp
new file mode 100644
index 0000000..77c1d56
--- /dev/null
+++ b/src/gemma_api.cpp
@@ -0,0 +1,33 @@
+// Testing bindings, see README.md and
+// https://www.gnu.org/savannah-checkouts/gnu/guile/docs/docs-2.0/guile-ref/Dynamic-Types.html
+
+#include <stdio.h>
+#include <libguile.h>
+// #include <libguile/boolean.h>
+// #include <libguile/numbers.h>
+
+// extern SCM my_incrementing_function (SCM a, SCM flag);
+
+// extern "C" SCM my_ping(SCM i);
+
+static SCM my_ping(SCM i) {
+    return i;
+}
+
+
+SCM my_incrementing_function (SCM a, SCM flag)
+{
+  SCM result;
+
+  if (scm_is_true (flag))
+    result = scm_sum (a, scm_from_int (1));
+  else
+    result = a;
+
+  return result;
+}
+
+
+extern "C" void init_module() {
+    scm_c_define_gsubr("my-ping", 1, 0, 0, (scm_t_subr)my_ping);
+}
diff --git a/src/gemma_io.cpp b/src/gemma_io.cpp
index f5a79a2..e630f64 100644
--- a/src/gemma_io.cpp
+++ b/src/gemma_io.cpp
@@ -2,7 +2,7 @@
     Genome-wide Efficient Mixed Model Association (GEMMA)
     Copyright © 2011-2017, Xiang Zhou
     Copyright © 2017, Peter Carbonetto
-    Copyright © 2017-2020, Pjotr Prins
+    Copyright © 2017-2025, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -41,6 +41,7 @@
 #include "gsl/gsl_linalg.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
+#include <lmdb++.h>
 
 #include "checkpoint.h"
 #include "debug.h"
@@ -61,7 +62,7 @@ void ProgressBar(string str, double p, double total, double ratio) {
   const double progress = (100.0 * p / total);
   const uint barsize = (int)(progress / 2.0); // characters
   // cout << barsize << endl;
-  // cout << str << " ";
+  cout << str << " ";
   // cout << p << "/" << total << endl;
   assert(barsize < 101); // corrupted data somehow
   if (barsize > 0) {
@@ -153,6 +154,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) {
 // Read SNP file. A single column of SNP names.
 bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
   debug_msg("enter ReadFile_snps");
+  checkpoint("read-file-snps",file_snps);
   setSnps.clear();
 
   igzstream infile(file_snps.c_str(), igzstream::in);
@@ -182,6 +184,7 @@ bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
 bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
   debug_msg("entered");
   setSnps.clear();
+  checkpoint("read-file-header",file_snps);
 
   igzstream infile(file_snps.c_str(), igzstream::in);
   if (!infile) {
@@ -239,6 +242,7 @@ bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps) {
 // Read log file.
 bool ReadFile_log(const string &file_log, double &pheno_mean) {
   debug_msg("ReadFile_log");
+  checkpoint("read-file-log",file_log);
   ifstream infile(file_log.c_str(), ifstream::in);
   if (!infile) {
     cout << "error! fail to open log file: " << file_log << endl;
@@ -282,6 +286,8 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
                    map<string, long int> &mapRS2bp,
                    map<string, double> &mapRS2cM) {
   debug_msg("ReadFile_anno");
+  checkpoint("read-file-anno",file_anno);
+
   mapRS2chr.clear();
   mapRS2bp.clear();
 
@@ -345,6 +351,7 @@ bool ReadFile_anno(const string &file_anno, map<string, string> &mapRS2chr,
 bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
                      vector<double> &pheno, const int &p_column) {
   debug_msg("entered");
+  checkpoint("read-file-column",file_pheno);
   indicator_idv.clear();
   pheno.clear();
 
@@ -389,6 +396,7 @@ bool ReadFile_pheno(const string &file_pheno,
                     vector<vector<double>> &pheno,
                     const vector<size_t> &p_column) {
   debug_msg("entered");
+  checkpoint("read-file-pheno",file_pheno);
   indicator_pheno.clear();
   pheno.clear();
 
@@ -398,35 +406,36 @@ bool ReadFile_pheno(const string &file_pheno,
     return false;
   }
 
-  string line;
-  char *ch_ptr;
-
-  string id;
+  // string id;
   double p;
 
   vector<double> pheno_row;
-  vector<int> ind_pheno_row;
+
+  string line;
+  char *ch_ptr;
+
+  vector<int> indicator_pheno_row;
 
   size_t p_max = *max_element(p_column.begin(), p_column.end());
   map<size_t, size_t> mapP2c;
   for (size_t i = 0; i < p_column.size(); i++) {
     mapP2c[p_column[i]] = i;
     pheno_row.push_back(-9);
-    ind_pheno_row.push_back(0);
+    indicator_pheno_row.push_back(0);
   }
 
   while (!safeGetline(infile, line).eof()) {
     ch_ptr = strtok((char *)line.c_str(), " ,\t");
     size_t i = 0;
     while (i < p_max) {
-      enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match phenotypes in geno file");
+      enforce_msg(ch_ptr,"Number of phenotypes in pheno file do not match selected columns");
       if (mapP2c.count(i + 1) != 0) {
         if (strcmp(ch_ptr, "NA") == 0) {
-          ind_pheno_row[mapP2c[i + 1]] = 0;
+          indicator_pheno_row[mapP2c[i + 1]] = 0; // skip this trait row
           pheno_row[mapP2c[i + 1]] = -9;
         } else {
           p = atof(ch_ptr);
-          ind_pheno_row[mapP2c[i + 1]] = 1;
+          indicator_pheno_row[mapP2c[i + 1]] = 1; // use this trait row
           pheno_row[mapP2c[i + 1]] = p;
         }
       }
@@ -434,7 +443,7 @@ bool ReadFile_pheno(const string &file_pheno,
       ch_ptr = strtok(NULL, " ,\t");
     }
 
-    indicator_pheno.push_back(ind_pheno_row);
+    indicator_pheno.push_back(indicator_pheno_row);
     pheno.push_back(pheno_row);
   }
 
@@ -448,6 +457,7 @@ bool ReadFile_pheno(const string &file_pheno,
 bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
                   vector<vector<double>> &cvt, size_t &n_cvt) {
   debug_msg("entered");
+  checkpoint("read-file-cvt",file_cvt);
   indicator_cvt.clear();
 
   ifstream infile(file_cvt.c_str(), ifstream::in);
@@ -515,6 +525,7 @@ bool ReadFile_cvt(const string &file_cvt, vector<int> &indicator_cvt,
 
 // Read .bim file.
 bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
+  checkpoint("read-file-bim",file_bim);
   debug_msg("entered");
   snpInfo.clear();
 
@@ -563,6 +574,7 @@ bool ReadFile_bim(const string &file_bim, vector<SNPINFO> &snpInfo) {
 bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
                   vector<vector<double>> &pheno, map<string, int> &mapID2num,
                   const vector<size_t> &p_column) {
+  checkpoint("read-file-fam",file_fam);
   debug_msg("entered");
   indicator_pheno.clear();
   pheno.clear();
@@ -639,16 +651,55 @@ bool ReadFile_fam(const string &file_fam, vector<vector<int>> &indicator_pheno,
   return true;
 }
 
+
 // Read bimbam mean genotype file, the first time, to obtain #SNPs for
 // analysis (ns_test) and total #SNP (ns_total).
-bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
-                   const gsl_matrix *W, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp, const double &maf_level,
-                   const double &miss_level, const double &hwe_level,
-                   const double &r2_level, map<string, string> &mapRS2chr,
-                   map<string, long int> &mapRS2bp,
-                   map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
-                   size_t &ns_test) {
+/*
+## Modified (Mutated) Parameters:
+
+1. **`indicator_idv`** (vector<int>&)
+   - **Not modified** - only read from to determine which individuals to include
+
+2. **`indicator_snp`** (vector<int>&)
+   - **Modified**: Cleared at the start (`indicator_snp.clear()`)
+   - Populated with 0s and 1s to indicate whether each SNP passes filters
+   - One entry per SNP in the genotype file
+
+3. **`mapRS2chr`** (map<string, string>&)
+   - **Not modified** - only read from to get chromosome info
+
+4. **`mapRS2bp`** (map<string, long int>&)
+   - **Not modified** - only read from to get base pair positions
+
+5. **`mapRS2cM`** (map<string, double>&)
+   - **Not modified** - only read from to get centimorgan positions
+
+6. **`snpInfo`** (vector<SNPINFO>&)
+   - **Modified**: Cleared at the start (`snpInfo.clear()`)
+   - Populated with SNPINFO structures for each SNP containing metadata (chr, rs, position, alleles, missingness, MAF, etc.)
+
+7. **`ns_test`** (size_t&)
+   - **Modified**: Set to 0 initially
+   - Incremented for each SNP that passes all filters
+   - Returns the count of SNPs that will be used for testing
+
+## Summary of Mutated Outputs:
+- **`indicator_snp`**: Binary indicators for which SNPs passed filtering
+- **`snpInfo`**: Complete metadata for all SNPs in the file
+- **`ns_test`**: Count of SNPs passing filters
+
+The function returns a `bool` indicating success/failure, but the real outputs are these three modified reference parameters.
+*/
+
+bool ReadFile_bimbam_geno(const string &file_geno, const set<string> &setSnps,
+                          const gsl_matrix *W, const vector<int> &indicator_idv,
+                          vector<int> &indicator_snp, const double &maf_level,
+                          const double &miss_level, const double &hwe_level,
+                          const double &r2_level, map<string, string> &mapRS2chr,
+                          map<string, long int> &mapRS2bp,
+                          map<string, double> &mapRS2cM, vector<SNPINFO> &snpInfo,
+                          size_t &ns_test) {
+  checkpoint("read-file-geno",file_geno);
   debug_msg("entered");
   indicator_snp.clear();
   snpInfo.clear();
@@ -661,7 +712,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
 
   gsl_vector *genotype = gsl_vector_safe_alloc(W->size1);
   gsl_vector *genotype_miss = gsl_vector_safe_alloc(W->size1);
-  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2);
+  gsl_matrix *WtW = gsl_matrix_safe_alloc(W->size2, W->size2); // Covariates
   gsl_matrix *WtWi = gsl_matrix_safe_alloc(W->size2, W->size2);
   gsl_vector *Wtx = gsl_vector_safe_alloc(W->size2);
   gsl_vector *WtWiWtx = gsl_vector_safe_alloc(W->size2);
@@ -687,9 +738,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   double cM;
   size_t file_pos;
 
-  double maf, geno, geno_old;
+  double geno, geno_old;
   size_t n_miss;
-  size_t n_0, n_1, n_2;
   double min_g = std::numeric_limits<float>::max();
   double max_g = std::numeric_limits<float>::min();
 
@@ -700,6 +750,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   for (int i = 0; i < ni_total; ++i) {
     ni_test += indicator_idv[i];
   }
+  // assert(ni_test == ni_total); // we use indicator_idv?
   ns_test = 0;
 
   file_pos = 0;
@@ -709,13 +760,14 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   //      cerr << key << endl;
   // }
   while (!safe_get_line(infile, line).eof()) {
-    // cout << line;
+    // fetch SNP annotation
     ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen);
     rs = ch_ptr;
     ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
     minor = ch_ptr;
     ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
     major = ch_ptr;
+    // genotypes get read below
 
     if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
       // if SNP in geno but not in -snps we add an missing value
@@ -745,21 +797,21 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     }
 
     // Start on a new marker/SNP
-    maf = 0;
     n_miss = 0;
     flag_poly = 0;
     geno_old = -9;
-    n_0 = 0;
-    n_1 = 0;
-    n_2 = 0;
     c_idv = 0;
     gsl_vector_set_zero(genotype_miss);
+    double maf = 0.0;
+    size_t n_0=0,n_1=0,n_2=0;
+
     auto infilen = file_geno.c_str();
-    for (int i = 0; i < ni_total; ++i) {
+    for (int i = 0; i < ni_total; ++i) { // read genotypes
       ch_ptr = strtok_safe2(NULL, " ,\t",(string("To many individuals/strains/genometypes in pheno file for ")+infilen).c_str());
       if (indicator_idv[i] == 0)
         continue;
 
+      // annotate missing genotypes
       enforce_msg(ch_ptr,"Problem reading geno file (not enough genotypes in line)");
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(genotype_miss, c_idv, 1);
@@ -769,19 +821,12 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
       }
 
       geno = atof(ch_ptr);
-      if (geno >= 0 && geno <= 0.5) {
-        n_0++;
-      }
-      if (geno > 0.5 && geno < 1.5) {
-        n_1++;
-      }
-      if (geno >= 1.5 && geno <= 2.0) {
-        n_2++;
-      }
 
       gsl_vector_set(genotype, c_idv, geno);
-      if (geno < min_g) min_g = geno;
-      if (geno > max_g) max_g = geno;
+      if (geno < min_g)
+        min_g = geno;
+      if (geno > max_g)
+        max_g = geno;
 
       // going through genotypes with 0.0 < geno < 2.0
       if (flag_poly == 0) { // first init in marker
@@ -792,10 +837,20 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
         flag_poly = 1;      // genotypes differ
       }
 
+      // compute ratios
+      if (hwe_level != 0 && maf_level != -1) {
+        if (geno >= 0 && geno <= 0.5)
+          n_0++;
+        if (geno > 0.5 && geno < 1.5)
+          n_1++;
+        if (geno >= 1.5 && geno <= 2.0)
+          n_2++;
+      }
       maf += geno;
 
       c_idv++;
     }
+
     maf /= 2.0 * (double)(ni_test - n_miss);
 
     SNPINFO sInfo = {chr,    rs,
@@ -832,8 +887,6 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
         continue;
       }
     }
-
-
     // -r2 flag
     for (size_t i = 0; i < genotype->size; ++i) {
       if (gsl_vector_get(genotype_miss, i) == 1) {
@@ -841,7 +894,6 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
         gsl_vector_set(genotype, i, geno);
       }
     }
-
     gsl_blas_dgemv(CblasTrans, 1.0, W, genotype, 0.0, Wtx);
     gsl_blas_dgemv(CblasNoTrans, 1.0, WtWi, Wtx, 0.0, WtWiWtx);
     gsl_blas_ddot(genotype, genotype, &v_x);
@@ -863,7 +915,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
   if (max_g != 2.0)
     warning_msg("The maximum genotype value is not 2.0 - this is not the BIMBAM standard and will skew l_lme and effect sizes");
 
-  gsl_vector_free(genotype);
+  gsl_vector_free(genotype); // we don't hang on to the genotypes
   gsl_vector_free(genotype_miss);
   gsl_matrix_free(WtW);
   gsl_matrix_free(WtWi);
@@ -885,6 +937,7 @@ bool ReadFile_bed(const string &file_bed, const set<string> &setSnps,
                   const double &maf_level, const double &miss_level,
                   const double &hwe_level, const double &r2_level,
                   size_t &ns_test) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   indicator_snp.clear();
   size_t ns_total = snpInfo.size();
@@ -1193,6 +1246,7 @@ void Plink_ReadOneSNP(const int pos, const vector<int> &indicator_idv,
 void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
                   map<string, int> &mapID2num, const size_t k_mode, bool &error,
                   gsl_matrix *G) {
+  checkpoint("read-file-kin",file_kin);
   debug_msg("entered");
   igzstream infile(file_kin.c_str(), igzstream::in);
   if (!infile) {
@@ -1296,7 +1350,7 @@ void ReadFile_kin(const string &file_kin, vector<int> &indicator_idv,
 
   infile.close();
   infile.clear();
-  checkpoint("read-kinship-file",file_kin);
+  checkpoint("end-read-file-kin",file_kin);
 
   return;
 }
@@ -1422,6 +1476,197 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
   return;
 }
 
+
+// Read bimbam mean genotype file and calculate kinship matrix.
+gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string loco) {
+  checkpoint("mdb-kin",file_geno);
+  bool is_loco = !loco.empty();
+
+  // Convert loco string to what we use in the chrpos index
+  uint8_t loco_chr = 0;
+  if (is_loco) {
+    if (loco == "X") {
+      loco_chr = CHR_X;
+    } else if (loco == "Y") {
+      loco_chr = CHR_Y;
+    } else if (loco == "M") {
+      loco_chr = CHR_M;
+    } else {
+      try {
+        loco_chr = static_cast<uint8_t>(stoi(loco));
+      } catch (...) {
+        loco_chr = 0;
+      }
+    }
+  }
+
+  /* Create and open the LMDB environment: */
+  auto env = lmdb::env::create();
+
+  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL); /* 10 GiB */
+  env.set_max_dbs(10);
+  env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  string format;
+
+  size_t ni_total = 0, num_markers = 0;
+  {
+    auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+    auto info = lmdb::dbi::open(rtxn, "info");
+
+    string_view meta;
+    if (info.get(rtxn, "meta", meta)) {
+      cout << meta << endl;
+    } else {
+      cout << "meta not found!" << endl;
+    }
+    std::string_view view_int;
+    info.get(rtxn, "numsamples", view_int);
+    ni_total = lmdb::from_sv<size_t>(view_int);
+    string_view info_key,info_value;
+    info.get(rtxn,"format",info_value);
+    format = string(info_value);
+
+    MDB_stat stat;
+    mdb_stat(rtxn, info, &stat);
+    assert(stat.ms_entries == 5);
+  }
+
+  auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+
+  auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
+
+  MDB_stat stat;
+  mdb_stat(rtxn, geno_mdb, &stat);
+  // cout << "Number of records: " << stat.ms_entries << endl;
+  num_markers = stat.ms_entries;
+
+  gsl_matrix *matrix_kin = gsl_matrix_safe_alloc(ni_total,ni_total);
+  gsl_matrix_set_zero(matrix_kin);
+  gsl_vector *geno = gsl_vector_safe_alloc(ni_total);
+  gsl_vector *geno_miss = gsl_vector_safe_alloc(ni_total);
+
+  // Xlarge contains inds x markers
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(ni_total, K_BATCH_SIZE);
+  enforce_msg(Xlarge, "allocate Xlarge ni_total x K_BATCH_SIZE");
+  if (is_check_mode())
+    gsl_matrix_set_zero(Xlarge);
+
+  // [ ] check XLarge is zeroed properly
+  // [ ] handle missing data
+
+  // For every marker read the genotype x inds
+  size_t ns_test = 0;
+  size_t display_pace = 1000;
+
+  cout << "## number of total individuals = " << ni_total << endl;
+  cout << "## number of analyzed individuals = " << ni_total << endl;
+  cout << "## number of analyzed SNPs/var = " << num_markers << endl;
+
+  auto cursor = lmdb::cursor::open(rtxn, geno_mdb);
+
+  auto mdb_fetch = MDB_FIRST;
+  for (size_t t = 0; t < num_markers; ++t) {
+    string line;
+    if (t % display_pace == 0) {
+      ProgressBar("Reading SNPs", t, num_markers - 1);
+    }
+    string_view key, value;
+    cursor.get(key, value, mdb_fetch);
+    mdb_fetch = MDB_NEXT;
+
+    const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
+    auto chr = static_cast<uint8_t>(data[1]);
+
+    if (is_loco && loco_chr == chr)
+      continue;
+
+    // ---- Depending on the format we get different buffers - currently float and byte are supported:
+    vector<double> gs;
+    if (format == "Gb") {
+      size_t num_bytes = value.size() / sizeof(uint8_t);
+      assert(num_bytes == ni_total);
+      auto size = num_bytes;
+      const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        double g = static_cast<double>(gs_bbuf[i])/127.0;
+        gs.push_back(g);
+      }
+    } else {
+      size_t num_floats = value.size() / sizeof(float);
+      assert(num_floats == ni_total);
+      auto size = num_floats;
+      const float* gs_fbuf = reinterpret_cast<const float*>(value.data());
+      gs.reserve(size);
+      for (size_t i = 0; i < size; ++i) {
+        gs.push_back(static_cast<double>(gs_fbuf[i]));
+      }
+    }
+
+    size_t n_miss = 0;
+    double geno_mean = 0.0;
+    double geno_var = 0.0;
+    gsl_vector_set_all(geno_miss, 0);
+
+    for (size_t i = 0; i < ni_total; ++i) {
+      double d = gs[i];
+      gsl_vector_set(geno, i, d);
+      gsl_vector_set(geno_miss, i, 1);
+      geno_mean += d;
+      geno_var += d * d;
+    }
+
+    geno_mean /= (double)(ni_total - n_miss);
+    geno_var += geno_mean * geno_mean * (double)n_miss;
+    geno_var /= (double)ni_total;
+    geno_var -= geno_mean * geno_mean;
+
+    // impute missing values by plugging in the mean
+    for (size_t i = 0; i < ni_total; ++i) {
+      if (gsl_vector_get(geno_miss, i) == 0) {
+        gsl_vector_set(geno, i, geno_mean);
+      }
+    }
+
+    // subtract the mean (centering genotype values)
+    gsl_vector_add_constant(geno, -1.0 * geno_mean);
+
+    // scale the genotypes
+    if (k_mode == 2 && geno_var != 0) { // some confusion here, -gk 2
+                                        // flag does this
+      gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
+    }
+
+    // set the SNP column ns_test to copy geno into the compute matrix Xlarge
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % K_BATCH_SIZE);
+    enforce_gsl(gsl_vector_memcpy(&Xlarge_col.vector, geno));
+
+    ns_test++;
+
+    // Every K_BATCH_SIZE rows batch compute kinship matrix and return
+    // by adding to matrix_kin
+    if (ns_test % K_BATCH_SIZE == 0) {
+      fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+      gsl_matrix_set_zero(Xlarge);
+      write(matrix_kin,"K updated");
+    }
+  }
+  if (ns_test % K_BATCH_SIZE != 0) { // compute last batch
+    fast_eigen_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
+    write(matrix_kin,"K updated");
+  }
+  ProgressBar("Reading SNPs", num_markers, num_markers);
+  cout << endl;
+
+  enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test));
+
+  gsl_vector_free(geno);
+  gsl_vector_free(geno_miss);
+  gsl_matrix_free(Xlarge);
+
+  return matrix_kin;
+}
+
 // Read bimbam mean genotype file and calculate kinship matrix.
 bool BimbamKin(const string file_geno, const set<string> ksnps,
                vector<int> &indicator_snp, const int k_mode,
@@ -1431,6 +1676,7 @@ bool BimbamKin(const string file_geno, const set<string> ksnps,
   auto infilen = file_geno.c_str();
   igzstream infile(infilen, igzstream::in);
   enforce_msg(infilen, "error reading genotype file");
+  checkpoint("bimbam-kin",file_geno);
 
   size_t n_miss;
   double geno_mean, geno_var;
@@ -1752,12 +1998,14 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                    const bool calc_K) {
+  checkpoint("read-file-geno",file_geno);
   debug_msg("entered");
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
     cout << "error reading genotype file:" << file_geno << endl;
     return false;
   }
+  checkpoint("read-geno-file",file_geno);
 
   string line;
   char *ch_ptr;
@@ -1852,121 +2100,12 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
   return true;
 }
 
-// Compact version of the above function, using uchar instead of
-// gsl_matrix.
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp,
-                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                   const bool calc_K, const size_t ni_test,
-                   const size_t ns_test) {
-  debug_msg("entered");
-  igzstream infile(file_geno.c_str(), igzstream::in);
-  if (!infile) {
-    cout << "error reading genotype file:" << file_geno << endl;
-    return false;
-  }
-
-  Xt.clear();
-  vector<unsigned char> Xt_row;
-  for (size_t i = 0; i < ni_test; i++) {
-    Xt_row.push_back(0);
-  }
-
-  string line;
-  char *ch_ptr;
-
-  if (calc_K == true) {
-    gsl_matrix_set_zero(K);
-  }
-
-  gsl_vector *genotype = gsl_vector_safe_alloc(ni_test);
-  gsl_vector *genotype_miss = gsl_vector_safe_alloc(ni_test);
-  double geno, geno_mean;
-  size_t n_miss;
-
-  size_t ni_total = indicator_idv.size();
-  size_t ns_total = indicator_snp.size();
-
-  size_t c_idv = 0, c_snp = 0;
-
-  auto infilen = file_geno.c_str();
-  for (size_t i = 0; i < ns_total; ++i) {
-    safeGetline(infile, line).eof();
-    if (indicator_snp[i] == 0) {
-      continue;
-    }
-
-    ch_ptr = strtok_safe2((char *)line.c_str(), " ,\t",infilen);
-    ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-    ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-
-    c_idv = 0;
-    geno_mean = 0;
-    n_miss = 0;
-    gsl_vector_set_zero(genotype_miss);
-    for (uint j = 0; j < ni_total; ++j) {
-      ch_ptr = strtok_safe2(NULL, " ,\t",infilen);
-      if (indicator_idv[j] == 0) {
-        continue;
-      }
-
-      if (strcmp(ch_ptr, "NA") == 0) {
-        gsl_vector_set(genotype_miss, c_idv, 1);
-        n_miss++;
-      } else {
-        geno = atof(ch_ptr);
-        gsl_vector_set(genotype, c_idv, geno);
-        geno_mean += geno;
-      }
-      c_idv++;
-    }
-
-    geno_mean /= (double)(ni_test - n_miss);
-
-    for (size_t j = 0; j < genotype->size; ++j) {
-      if (gsl_vector_get(genotype_miss, j) == 1) {
-        geno = geno_mean;
-      } else {
-        geno = gsl_vector_get(genotype, j);
-      }
-
-      Xt_row[j] = Double02ToUchar(geno);
-      gsl_vector_set(genotype, j, (geno - geno_mean));
-    }
-    Xt.push_back(Xt_row);
-
-    if (calc_K == true) {
-      gsl_blas_dsyr(CblasUpper, 1.0, genotype, K);
-    }
-
-    c_snp++;
-  }
-
-  if (calc_K == true) {
-    gsl_matrix_scale(K, 1.0 / (double)ns_test);
-
-    for (size_t i = 0; i < genotype->size; ++i) {
-      for (size_t j = 0; j < i; ++j) {
-        geno = gsl_matrix_get(K, j, i);
-        gsl_matrix_set(K, i, j, geno);
-      }
-    }
-  }
-
-  gsl_vector_free(genotype);
-  gsl_vector_free(genotype_miss);
-
-  infile.clear();
-  infile.close();
-
-  return true;
-}
-
 // Read bimbam mean genotype file, the second time, recode "mean"
 // genotype and calculate K.
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                   const bool calc_K) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
@@ -2100,6 +2239,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
                   gsl_matrix *K, const bool calc_K, const size_t ni_test,
                   const size_t ns_test) {
+  checkpoint("read-file-bed",file_bed);
   debug_msg("entered");
   ifstream infile(file_bed.c_str(), ios::binary);
   if (!infile) {
@@ -2236,6 +2376,7 @@ bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
 
 bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
                   map<string, double> &mapRS2est) {
+  checkpoint("read-file-est",file_est);
   debug_msg("entered");
   mapRS2est.clear();
 
@@ -2303,7 +2444,8 @@ bool ReadFile_est(const string &file_est, const vector<size_t> &est_column,
 }
 
 bool CountFileLines(const string &file_input, size_t &n_lines) {
-  debug_msg("entered");
+  checkpoint("count-lines",file_input);
+
   igzstream infile(file_input.c_str(), igzstream::in);
   if (!infile) {
     cout << "error! fail to open file: " << file_input << endl;
@@ -2320,6 +2462,7 @@ bool CountFileLines(const string &file_input, size_t &n_lines) {
 // Read gene expression file.
 bool ReadFile_gene(const string &file_gene, vector<double> &vec_read,
                    vector<SNPINFO> &snpInfo, size_t &ng_total) {
+  checkpoint("read-file-gene",file_gene);
   debug_msg("entered");
   vec_read.clear();
   ng_total = 0;
diff --git a/src/gemma_io.h b/src/gemma_io.h
index dd1d5c0..06cd0ee 100644
--- a/src/gemma_io.h
+++ b/src/gemma_io.h
@@ -26,12 +26,17 @@
 #include <algorithm>
 #include <map>
 #include <vector>
+#include <cstdint>
 
 #include "gzstream.h"
 #include "param.h"
 
 #define tab(col) ( col ? "\t" : "")
 
+constexpr uint8_t CHR_X = 'X';
+constexpr uint8_t CHR_Y = 'Y';
+constexpr uint8_t CHR_M = 'M';
+
 using namespace std;
 
 void ProgressBar(string str, double p, double total, double ratio = -1.0);
@@ -59,8 +64,8 @@ bool ReadFile_pheno(const string &file_pheno,
 bool ReadFile_column(const string &file_pheno, vector<int> &indicator_idv,
                      vector<double> &pheno, const int &p_column);
 
-bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
-                   const gsl_matrix *W, vector<int> &indicator_idv,
+bool ReadFile_bimbam_geno(const string &file_geno, const set<string> &setSnps,
+                   const gsl_matrix *W, const vector<int> &indicator_idv,
                    vector<int> &indicator_snp, const double &maf_level,
                    const double &miss_level, const double &hwe_level,
                    const double &r2_level, map<string, string> &mapRS2chr,
@@ -87,6 +92,8 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
 void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
 
+gsl_matrix *mdb_calc_kin(const string file_geno, const int k_mode, const string loco);
+
 bool BimbamKin(const string file_geno, const set<string> ksnps,
                vector<int> &indicator_snp, const int k_mode,
                const int display_pace, gsl_matrix *matrix_kin,
@@ -100,11 +107,6 @@ bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                   const bool calc_K);
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
-                   vector<int> &indicator_snp,
-                   vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                   const bool calc_K, const size_t ni_test,
-                   const size_t ns_test);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
                   vector<int> &indicator_snp, vector<vector<unsigned char>> &Xt,
                   gsl_matrix *K, const bool calc_K, const size_t ni_test,
diff --git a/src/guile/examples.scm b/src/guile/examples.scm
new file mode 100644
index 0000000..8855a34
--- /dev/null
+++ b/src/guile/examples.scm
@@ -0,0 +1,6 @@
+(define-module (example)
+  #:use-module (srfi srfi-4)
+  #:export (make-floats))
+
+(define (make-floats)
+  #f64( 1.0 2.2 3.3 4.0))
diff --git a/src/guile_api.cpp b/src/guile_api.cpp
new file mode 100644
index 0000000..e3ef482
--- /dev/null
+++ b/src/guile_api.cpp
@@ -0,0 +1,44 @@
+#include <guile_api.h>
+
+namespace gemmaguile {
+
+    void global_start_guile() {
+        scm_init_guile();
+    }
+
+    string global_guile_version() {
+        SCM version_scm = scm_version();
+        char* c_str = scm_to_utf8_string(version_scm);
+        string version_str(c_str);
+        free(c_str);  // Must free the allocated memory
+
+        return version_str;
+    }
+
+    SCM make_floats()
+    {
+        return scm_call_0(scm_c_public_ref("example", "make-floats"));
+    }
+
+    void use_floats()
+    {
+        SCM vec = make_floats();
+
+        scm_t_array_handle handle;
+        size_t len;
+        ssize_t inc;
+        const SCM *elt;
+
+        const double *ny = scm_f64vector_elements (vec,&handle,&len,&inc);
+        for (size_t i = 0; i < len; i++) {
+            printf("elem %zu = %f\n", i, ny[i]);
+        }
+        scm_array_handle_release (&handle);
+    }
+
+    void start_test() {
+        scm_c_primitive_load("src/guile/examples.scm");
+        use_floats();
+    }
+
+}
diff --git a/src/guile_api.h b/src/guile_api.h
new file mode 100644
index 0000000..b413342
--- /dev/null
+++ b/src/guile_api.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <string>
+#include <libguile.h>
+
+using namespace std;
+
+namespace gemmaguile {
+
+    void global_start_guile();
+    string global_guile_version();
+    void start_test();
+
+};
diff --git a/src/lapack.cpp b/src/lapack.cpp
index e1a63e1..2d10b18 100644
--- a/src/lapack.cpp
+++ b/src/lapack.cpp
@@ -196,7 +196,7 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
 
     // disable fast NaN checking for now - dsyevr throws NaN errors,
     // but fixes them (apparently)
-    if (is_check_mode()) disable_segfpe();
+    // if (is_check_mode()) disable_segfpe();
 
     // DSYEVR - computes selected eigenvalues and, optionally,
     // eigenvectors of a real symmetric matrix
@@ -223,7 +223,7 @@ void lapack_eigen_symmv(gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec,
     if (INFO != 0) cerr << "ERROR: value of INFO is " << INFO;
     enforce_msg(INFO == 0, "lapack_eigen_symmv failed");
 
-    if (is_check_mode()) enable_segfpe(); // reinstate fast NaN checking
+    // if (is_check_mode()) enable_segfpe(); // reinstate fast NaN checking
 
     gsl_matrix_transpose(evec);
 
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 85e92fe..2b730f3 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -2,7 +2,7 @@
     Genome-wide Efficient Mixed Model Association (GEMMA)
     Copyright © 2011-2017, Xiang Zhou
     Copyright © 2017, Peter Carbonetto
-    Copyright © 2017-2022 Pjotr Prins
+    Copyright © 2017-2025 Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -40,10 +40,14 @@
 #include "gsl/gsl_min.h"
 #include "gsl/gsl_roots.h"
 #include "gsl/gsl_vector.h"
+#include <lmdb++.h>
+// #include <lmdb.h>
+#include <sys/mman.h>
 
 #include "gzstream.h"
 #include "gemma.h"
 #include "gemma_io.h"
+#include "checkpoint.h"
 #include "fastblas.h"
 #include "lapack.h"
 #include "lmm.h"
@@ -54,6 +58,7 @@
 using namespace std;
 
 void LMM::CopyFromParam(PARAM &cPar) {
+  checkpoint_nofile("lmm-copy-from-param");
   a_mode = cPar.a_mode;
   d_pace = cPar.d_pace;
 
@@ -90,10 +95,12 @@ void LMM::CopyFromParam(PARAM &cPar) {
 }
 
 void LMM::CopyToParam(PARAM &cPar) {
+  checkpoint_nofile("lmm-copy-to-param");
   cPar.time_UtX = time_UtX;
   cPar.time_opt = time_opt;
-
-  cPar.ng_test = ng_test;
+  cPar.ns_total = ns_total;
+  cPar.ns_test  = ns_test;
+  cPar.ng_test  = ng_test; // number of markers tested
 
   return;
 }
@@ -104,10 +111,11 @@ void LMM::WriteFiles() {
 
   file_str = path_out + "/" + file_out;
   file_str += ".assoc.txt";
+  checkpoint("lmm-write-files",file_str);
 
   ofstream outfile(file_str.c_str(), ofstream::out);
   if (!outfile) {
-    cout << "error writing file: " << file_str.c_str() << endl;
+    cout << "error writing file: " << file_str << endl;
     return;
   }
 
@@ -147,7 +155,7 @@ void LMM::WriteFiles() {
     outfile << scientific << setprecision(6);
 
     if (a_mode != M_LMM2) {
-      outfile << st.beta << "\t";
+      outfile << scientific << st.beta << "\t";
       outfile << st.se << "\t";
     }
 
@@ -201,21 +209,29 @@ void LMM::WriteFiles() {
 
     common_header();
 
-    size_t t = 0;
-    for (size_t i = 0; i < snpInfo.size(); ++i) {
-      if (indicator_snp[i] == 0)
-        continue;
-      auto snp = snpInfo[i].rs_number;
-      if (process_gwasnps && setGWASnps.count(snp) == 0)
-        continue;
-      // cout << t << endl;
-      outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
-              << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
-              << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
-              << fixed << setprecision(3) << snpInfo[i].maf << "\t";
-
-      sumstats(sumStat[t]);
-      t++;
+    if (snpInfo.size()) {
+      size_t t = 0;
+      for (size_t i = 0; i < snpInfo.size(); ++i) {
+        if (indicator_snp[i] == 0)
+          continue;
+        auto snp = snpInfo[i].rs_number;
+        if (process_gwasnps && setGWASnps.count(snp) == 0)
+          continue;
+        // cout << t << endl;
+        outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
+                << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
+                << snpInfo[i].a_minor << "\t" << snpInfo[i].a_major << "\t"
+                << fixed << setprecision(3) << snpInfo[i].maf << "\t";
+
+        sumstats(sumStat[t]);
+        t++;
+      }
+    }
+    else
+    {
+      for (auto &s : sumStat) {
+        sumstats(s);
+      }
     }
   }
 
@@ -1210,6 +1226,142 @@ void LMM::CalcRLScore(const double l, const FUNC_PARAM &params, double &beta,
   gsl_vector_safe_free(v_temp);
 }
 
+/*
+  ## What `CalcUab` does:
+
+`CalcUab` **computes element-wise products of transformed covariates and phenotypes** to create sufficient statistics needed for linear mixed model calculations.
+
+### Purpose:
+
+Creates a matrix `Uab` containing all pairwise element-wise products of:
+- Transformed covariates: columns of `UtW` (U^T × W)
+- Transformed phenotype: vector `Uty` (U^T × y)
+
+These products are used later to efficiently compute variance components and test statistics without repeatedly accessing the original data.
+
+### Function Signature:
+```cpp
+void CalcUab(const gsl_matrix *UtW,   // U^T × W (transformed covariates)
+             const gsl_vector *Uty,   // U^T × y (transformed phenotype)
+             gsl_matrix *Uab)         // OUTPUT: matrix of products
+```
+
+### Matrix Structure:
+
+Given:
+- **n_cvt** covariates (columns in W)
+- 1 phenotype (y)
+
+The function creates products for indices `a` and `b` where:
+- `a, b ∈ {1, 2, ..., n_cvt, n_cvt+2}` (note: n_cvt+1 is **skipped**)
+- Only computes for `b ≤ a` (lower triangular, symmetric)
+
+**Index mapping:**
+- `1` to `n_cvt`: Covariate columns from UtW
+- `n_cvt+1`: **SKIPPED** (reserved for genotype, added later)
+- `n_cvt+2`: Phenotype (Uty)
+
+### Algorithm Walkthrough:
+
+```cpp
+for (size_t a = 1; a <= n_cvt + 2; ++a) {
+  // Get column 'a'
+  if (a == n_cvt + 2) {
+    u_a = Uty;  // Phenotype
+  } else {
+    u_a = UtW[:, a-1];  // Covariate a
+  }
+
+  for (size_t b = a; b >= 1; --b) {
+    // Get column 'b'
+    if (b == n_cvt + 2) {
+      u_b = Uty;  // Phenotype
+    } else {
+      u_b = UtW[:, b-1];  // Covariate b
+    }
+
+    // Store element-wise product: u_a .* u_b
+    index_ab = GetabIndex(a, b, n_cvt);
+    Uab[:, index_ab] = u_a .* u_b;
+  }
+}
+```
+
+### Example:
+
+With **n_cvt = 2** (2 covariates):
+
+**Columns stored in Uab:**
+```
+GetabIndex(1, 1, 2) → UtW₁ .* UtW₁    (covariate 1 × covariate 1)
+GetabIndex(2, 1, 2) → UtW₂ .* UtW₁    (covariate 2 × covariate 1)
+GetabIndex(2, 2, 2) → UtW₂ .* UtW₂    (covariate 2 × covariate 2)
+GetabIndex(4, 1, 2) → Uty .* UtW₁     (phenotype × covariate 1)
+GetabIndex(4, 2, 2) → Uty .* UtW₂     (phenotype × covariate 2)
+GetabIndex(4, 4, 2) → Uty .* Uty      (phenotype × phenotype)
+
+Note: Index 3 (n_cvt+1) is skipped - reserved for genotype
+```
+
+### Why Skip `n_cvt+1`?
+
+```cpp
+if (a == n_cvt + 1) continue;
+if (b == n_cvt + 1) continue;
+```
+
+The slot `n_cvt+1` is **reserved for the genotype (SNP)** being tested, which is added by the overloaded version:
+
+```cpp
+void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty,
+             const gsl_vector *Utx,  // ← GENOTYPE
+             gsl_matrix *Uab);
+```
+
+This design allows the base covariate products to be computed once, then genotype-specific products added for each SNP.
+
+### Index Function:
+
+```cpp
+size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt)
+```
+
+Maps `(a, b)` pairs to column indices in `Uab`, ensuring:
+- Only lower triangular entries (b ≤ a)
+- Skips the genotype position (n_cvt+1)
+
+### Matrix Dimensions:
+
+```cpp
+size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+```
+
+Number of unique products in the symmetric matrix (including diagonal), accounting for the skipped genotype position.
+
+### In Context (from `batch_compute`):
+
+```cpp
+// Pre-compute covariate products once
+CalcUab(UtW, Uty, Uab);
+
+// For each SNP:
+for (size_t i = 0; i < batch_size; i++) {
+  gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+  gsl_vector_safe_memcpy(Utx, &UtXlarge_col.vector);
+
+  // Add genotype-specific products
+  CalcUab(UtW, Uty, Utx, Uab);  // Overloaded version
+
+  // Use Uab for likelihood calculations
+  CalcLambda(..., Uab, ...);
+}
+```
+
+### Summary:
+
+**`CalcUab` pre-computes all pairwise element-wise products** of transformed covariates and phenotype (UtW columns and Uty). These products are sufficient statistics used repeatedly in likelihood calculations for different SNPs, avoiding redundant computation. The design reserves space for genotype products to be added later per-SNP while reusing the fixed covariate products.
+*/
+
 void CalcUab(const gsl_matrix *UtW, const gsl_vector *Uty, gsl_matrix *Uab) {
   size_t index_ab;
   size_t n_cvt = UtW->size2;
@@ -1470,6 +1622,53 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
   return;
 }
 
+/*
+Looking at the LMM::Analyze function, here are the parameters that get modified:
+Modified (Mutated) Parameters:
+
+None of the function parameters are directly modified.
+
+All parameters are either:
+
+    const gsl_matrix * - pointer to const (read-only)
+    const gsl_vector * - pointer to const (read-only)
+    const set<string> - const set passed by value (copy)
+    std::function<...>& - reference to a function, which is called but not modified itself
+
+What Actually Gets Modified:
+
+The function modifies member variables of the LMM class object:
+
+    sumStat (member variable)
+        Modified: sumStat.push_back(SNPs) is called in the batch_compute lambda
+        Accumulates summary statistics (beta, se, lambda values, p-values) for each SNP that passes filters
+        This is the primary output of the analysis
+    Timing member variables (implied by member access):
+        time_UtX: time_UtX += (clock() - time_start) / ...
+        time_opt: time_opt += (clock() - time_start) / ...
+        These accumulate timing information for performance profiling
+    Member variables read but not modified:
+        indicator_snp - read to determine which SNPs to process
+        indicator_idv - read to determine which individuals to include
+        ni_total - number of total individuals
+        ni_test - number of individuals in test
+        n_cvt - number of covariates
+        l_mle_null, l_min, l_max, n_region, logl_mle_H0 - parameters for likelihood calculations
+        a_mode - analysis mode (M_LMM1, M_LMM2, etc.)
+        d_pace - for progress display
+
+Summary:
+
+From the outside caller's perspective: None of the parameters passed to Analyze() are mutated. All inputs are const.
+
+The actual output is stored in the member variable sumStat, which accumulates one SUMSTAT structure per analyzed SNP containing:
+
+    beta - effect size
+    se - standard error
+    lambda_remle, lambda_mle - variance component estimates
+    p_wald, p_lrt, p_score - p-values from different tests
+    logl_H1 - log-likelihood under alternative hypothesis
+*/
 
 void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
                   const gsl_matrix *U, const gsl_vector *eval,
@@ -1478,12 +1677,9 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
                   const set<string> gwasnps) {
   clock_t time_start = clock();
 
-  write(W, "W");
-  write(y, "y");
+  checkpoint_nofile("start-lmm-analyze");
   // Subset/LOCO support
   bool process_gwasnps = gwasnps.size();
-  if (process_gwasnps)
-    debug_msg("Analyze subset of SNPs (LOCO)");
 
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
@@ -1510,24 +1706,36 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
   // start reading genotypes and analyze
   size_t c = 0;
 
+  /*
+    batch_compute(l) takes l SNPs that have been loaded into Xlarge,
+    transforms them all at once using the eigenvector matrix U, then
+    loops through each transformed SNP to compute association
+    statistics (beta, standard errors, p-values) and stores results in
+    sumStat.
+  */
   auto batch_compute = [&](size_t l) { // using a C++ closure
     // Compute SNPs in batch, note the computations are independent per SNP
     // debug_msg("enter batch_compute");
+    checkpoint_nofile("\nstart-batch_compute");
     gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
     gsl_matrix_view UtXlarge_sub =
         gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
 
     time_start = clock();
+    // Transforms all l SNPs in the batch at once: UtXlarge = U^T × Xlarge
+    // This is much faster than doing l separate matrix-vector products
+    // U is the eigenvector matrix from the spectral decomposition of the kinship matrix
     fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
                    &UtXlarge_sub.matrix);
     time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
 
     gsl_matrix_set_zero(Xlarge);
     for (size_t i = 0; i < l; i++) {
-      // for every batch...
+      // for each snp batch item extract transformed genotype:
       gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
       gsl_vector_safe_memcpy(Utx, &UtXlarge_col.vector);
 
+      // Calculate design matrix components and compute sufficient statistics for the regression model
       CalcUab(UtW, Uty, Utx, Uab);
 
       time_start = clock();
@@ -1537,17 +1745,24 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
       double p_lrt = 0.0, p_score = 0.0;
       double logl_H1 = 0.0;
 
+      // Run statistical tests based on analysis mode
       // 3 is before 1.
       if (a_mode == M_LMM3 || a_mode == M_LMM4 || a_mode == M_LMM9 ) {
         CalcRLScore(l_mle_null, param1, beta, se, p_score);
       }
 
+      // Computes Wald statistic for testing association
       if (a_mode == M_LMM1 || a_mode == M_LMM4) {
         // for univariate a_mode is 1
+        // Estimates variance component (lambda) via REML
         CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
         CalcRLWald(lambda_remle, param1, beta, se, p_wald);
       }
 
+      // Estimates variance component (lambda) via REML
+      // Likelihood Ratio Test (modes 2, 4, 9):
+      // Estimates variance component via MLE
+      // Compares log-likelihood under alternative vs null hypothesis
       if (a_mode == M_LMM2 || a_mode == M_LMM4 || a_mode == M_LMM9) {
         CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
         p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
@@ -1561,6 +1776,7 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
       sumStat.push_back(SNPs);
     }
     // debug_msg("exit batch_compute");
+    checkpoint_nofile("end-batch_compute");
   };
 
   const auto num_snps = indicator_snp.size();
@@ -1642,9 +1858,10 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
   }
 
   batch_compute(c % msize);
-  ProgressBar("Reading SNPs", num_snps - 1, num_snps - 1);
+  ProgressBar("Reading SNPS", num_snps - 1, num_snps - 1);
   // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
   cout << endl;
+  checkpoint_nofile("end-lmm-analyze");
 
   gsl_vector_safe_free(x);
   gsl_vector_safe_free(x_miss);
@@ -1657,12 +1874,474 @@ void LMM::Analyze(std::function< SnpNameValues(size_t) >& fetch_snp,
 
 }
 
+/*
+  This is the mirror function of LMM::Analyze and AnalyzeBimbam, but uses mdb input instead.
+ */
+
+
+
+void LMM::mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
+                      const gsl_matrix *U, const gsl_vector *eval,
+                      const gsl_matrix *UtW, const gsl_vector *Uty,
+                      const gsl_matrix *W, const gsl_vector *y,
+                      size_t num_markers) {
+  vector<SUMSTAT2> sumstat2;
+  clock_t time_start = clock();
+
+  checkpoint_nofile("start-lmm-mdb-analyze");
+
+  // Calculate basic quantities.
+  size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
+
+  const size_t inds = U->size1;
+  enforce(inds == ni_test);
+  assert(inds > 0);
+
+  gsl_vector *x = gsl_vector_safe_alloc(inds); // #inds
+  gsl_vector *x_miss = gsl_vector_safe_alloc(inds);
+  assert(ni_test == U->size2);
+  assert(ni_test > 0);
+  assert(ni_total > 0);
+  assert(n_index > 0);
+  gsl_vector *Utx = gsl_vector_safe_alloc(ni_test);
+  gsl_matrix *Uab = gsl_matrix_safe_alloc(ni_test, n_index);
+  gsl_vector *ab = gsl_vector_safe_alloc(n_index);
+
+  const size_t msize = LMM_BATCH_SIZE;
+  gsl_matrix *Xlarge = gsl_matrix_safe_alloc(inds, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_safe_alloc(inds, msize);
+  enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
+  enforce(Xlarge->size1 == inds);
+  gsl_matrix_set_zero(Xlarge);
+  gsl_matrix_set_zero(Uab);
+  CalcUab(UtW, Uty, Uab);
+
+  // start reading genotypes and analyze
+  size_t c = 0;
+
+  /*
+    batch_compute(l) takes l x SNPs that have been loaded into Xlarge,
+    transforms them all at once using the eigenvector matrix U, then
+    loops through each transformed SNP to compute association
+    statistics (beta, standard errors, p-values) and stores results in
+    sumStat.
+  */
+  auto batch_compute = [&](size_t l, const Markers &markers) { // using a C++ closure
+    // Compute SNPs in batch, note the computations are independent per SNP
+    debug_msg("enter batch_compute");
+    assert(l>0);
+    assert(inds>0);
+    gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
+    gsl_matrix_view UtXlarge_sub =
+        gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
+
+    time_start = clock();
+    // Transforms all l SNPs in the batch at once: UtXlarge = U^T × Xlarge
+    // This is much faster than doing l separate matrix-vector products
+    // U is the eigenvector matrix from the spectral decomposition of the kinship matrix
+    fast_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                   &UtXlarge_sub.matrix);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    gsl_matrix_set_zero(Xlarge);
+    for (size_t i = 0; i < l; i++) {
+      // for each snp batch item extract transformed genotype:
+      gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+      gsl_vector_safe_memcpy(Utx, &UtXlarge_col.vector);
+
+      // Calculate design matrix components and compute sufficient statistics for the regression model
+      CalcUab(UtW, Uty, Utx, Uab);
+
+      time_start = clock();
+      FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+      double lambda_mle = 0.0, lambda_remle = 0.0, beta = 0.0, se = 0.0, p_wald = 0.0;
+      double p_lrt = 0.0, p_score = 0.0;
+      double logl_H1 = 0.0;
+
+      // Run statistical tests based on analysis mode
+      // 3 is before 1.
+      if (a_mode == M_LMM3 || a_mode == M_LMM4 || a_mode == M_LMM9 ) {
+        CalcRLScore(l_mle_null, param1, beta, se, p_score);
+      }
+
+      // Computes Wald statistic for testing association
+      if (a_mode == M_LMM1 || a_mode == M_LMM4) {
+        // for univariate a_mode is 1
+        // Estimates variance component (lambda) via REML
+        CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+        CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+      }
+
+      // Estimates variance component (lambda) via REML
+      // Likelihood Ratio Test (modes 2, 4, 9):
+      // Estimates variance component via MLE
+      // Compares log-likelihood under alternative vs null hypothesis
+      if (a_mode == M_LMM2 || a_mode == M_LMM4 || a_mode == M_LMM9) {
+        CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+      }
+
+      time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      auto markerinfo = markers[i];
+      // Store summary data.
+      SUMSTAT2 st = {markerinfo, beta, se, lambda_remle, lambda_mle, p_wald, p_lrt, p_score, logl_H1};
+      sumstat2.push_back(st);
+    }
+  };
+
+  /*
+  const auto num_markers = indicator_snp.size();
+  enforce_msg(num_markers > 0,"Zero SNPs to process - data corrupt?");
+  if (num_markers < 50) {
+    cerr << num_markers << " SNPs" << endl;
+    warning_msg("very few SNPs processed");
+  }
+  */
+  const size_t progress_step = (num_markers/50>d_pace ? num_markers/50 : d_pace);
+  Markers markers;
+
+  assert(num_markers > 0);
+  for (size_t t = 0; t < num_markers; ++t) {
+  // for (size_t t = 0; t < 2; ++t) {
+    if (t % progress_step == 0 || t == (num_markers - 1)) {
+      ProgressBar("Reading markers", t, num_markers - 1);
+    }
+    // if (indicator_snp[t] == 0)
+    // continue;
+
+    auto tup = fetch_snp(t); // use the callback
+    auto state = get<0>(tup);
+    if (state == SKIP)
+      continue;
+    if (state == LAST)
+      break; // marker loop because of LOCO
+
+    auto markerinfo = get<1>(tup);
+    auto gs = get<2>(tup);
+
+    markers.push_back(markerinfo);
+
+    // drop missing idv and plug mean values for missing geno
+    double x_total = 0.0; // sum genotype values to compute x_mean
+    uint vpos = 0;        // position in target vector
+    uint n_miss = 0;      // count NA genotypes
+    gsl_vector_set_zero(x_miss);
+    for (size_t i = 0; i < ni_total; ++i) {
+      // get the genotypes per individual and compute stats per SNP
+      if (indicator_idv[i] == 0) // skip individual
+        continue;
+
+      double geno = gs[i];
+      if (isnan(geno)) {
+        gsl_vector_set(x_miss, vpos, 1.0);
+        n_miss++;
+      } else {
+        gsl_vector_set(x, vpos, geno);
+        x_total += geno;
+      }
+      vpos++;
+    }
+    enforce(vpos == ni_test);
+
+    const double x_mean = x_total/(double)(ni_test - n_miss);
+
+    // plug x_mean back into missing values
+    for (size_t i = 0; i < ni_test; ++i) {
+      if (gsl_vector_get(x_miss, i) == 1.0) {
+        gsl_vector_set(x, i, x_mean);
+      }
+    }
+
+    enforce(x->size == ni_test);
+
+    // copy genotype values for SNP into Xlarge cache
+    gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
+    gsl_vector_safe_memcpy(&Xlarge_col.vector, x);
+    c++; // count markers going in
+
+    if (c % msize == 0) {
+      batch_compute(msize,markers);
+      markers.clear();
+      markers.reserve(msize);
+    }
+  }
+
+  if (c % msize)
+    batch_compute(c % msize,markers);
+  ProgressBar("Reading markers", num_markers - 1, num_markers - 1);
+  cout << endl;
+  cout << "Counted markers " << c << " sumStat " << sumstat2.size() << endl;
+  checkpoint_nofile("end-lmm-mdb-analyze");
+
+  string file_str;
+  debug_msg("LMM::WriteFiles");
+  file_str = path_out + "/" + file_out;
+  file_str += ".assoc.txt";
+  checkpoint("lmm-write-files",file_str);
+
+  ofstream outfile(file_str.c_str(), ofstream::out);
+  if (!outfile) {
+    cout << "error writing file: " << file_str << endl;
+    return;
+  }
+
+  auto sumstats = [&] (SUMSTAT2 st) {
+    outfile << scientific << setprecision(6);
+    auto m = st.markerinfo;
+    auto name = m.name;
+    auto chr  = m.chr;
+    string chr_s;
+    if (chr == CHR_X)
+      chr_s = "X";
+    else if (chr == CHR_Y)
+      chr_s = "Y";
+    else if (chr == CHR_M)
+      chr_s = "M";
+    else
+      chr_s = to_string(chr);
+
+    outfile << chr_s << "\t";
+    outfile << name << "\t";
+    outfile << m.pos << "\t";
+    outfile << m.n_miss << "\t-\t-\t"; // n_miss column + allele1 + allele0
+    outfile << fixed << setprecision(3) << m.maf << "\t";
+    outfile << scientific << setprecision(6);
+    if (a_mode != M_LMM2) {
+      outfile << st.beta << "\t";
+      outfile << st.se << "\t";
+    }
+
+    if (a_mode != M_LMM3 && a_mode != M_LMM9)
+      outfile << st.logl_H1 << "\t";
+
+    switch(a_mode) {
+    case M_LMM1:
+      outfile << st.lambda_remle << "\t"
+              << st.p_wald << endl;
+      break;
+    case M_LMM2:
+    case M_LMM9:
+      outfile << st.lambda_mle << "\t"
+              << st.p_lrt << endl;
+      break;
+    case M_LMM3:
+      outfile << st.p_score << endl;
+      break;
+    case M_LMM4:
+      outfile << st.lambda_remle << "\t"
+              << st.lambda_mle << "\t"
+              << st.p_wald << "\t"
+              << st.p_lrt << "\t"
+              << st.p_score << endl;
+      break;
+    }
+  };
+
+  for (auto &s : sumstat2) {
+    sumstats(s);
+  }
+
+  gsl_vector_safe_free(x);
+  gsl_vector_safe_free(x_miss);
+  gsl_vector_safe_free(Utx);
+  gsl_matrix_safe_free(Uab);
+  gsl_vector_free(ab); // unused
+
+  gsl_matrix_safe_free(Xlarge);
+  gsl_matrix_safe_free(UtXlarge);
+
+}
+
+
+void LMM::mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
+                          const gsl_matrix *UtW, const gsl_vector *Uty,
+                       const gsl_matrix *W, const gsl_vector *y, const string loco) {
+  checkpoint("mdb-calc-gwa",file_geno);
+  bool is_loco = !loco.empty();
+
+  // Convert loco string to what we use in the chrpos index
+  uint8_t loco_chr;
+  if (is_loco) {
+    if (loco == "X") {
+      loco_chr = CHR_X;
+    } else if (loco == "Y") {
+      loco_chr = CHR_Y;
+    } else if (loco == "M") {
+      loco_chr = CHR_M;
+    } else {
+      try {
+        loco_chr = static_cast<uint8_t>(stoi(loco));
+      } catch (...) {
+        loco_chr = 0;
+      }
+    }
+  }
+
+  auto env = lmdb::env::create();
+  env.set_mapsize(1UL * 1024UL * 1024UL * 1024UL * 1024UL);
+  env.set_max_dbs(10);
+  env.open(file_geno.c_str(), MDB_RDONLY | MDB_NOSUBDIR, 0664);
+  // Get mmap info using lmdb++ wrapper
+  MDB_envinfo info;
+  mdb_env_info(env.handle(), &info);
+  // Linux kernel aggressive readahead hints
+  madvise(info.me_mapaddr, info.me_mapsize, MADV_SEQUENTIAL);
+  madvise(info.me_mapaddr, info.me_mapsize, MADV_WILLNEED);
+
+  std::cout << "## LMDB opened with optimized readahead; map size = " << (info.me_mapsize / 1024 / 1024) << " MB" << std::endl;
+
+  auto rtxn = lmdb::txn::begin(env, nullptr, MDB_RDONLY);
+  auto geno_mdb = lmdb::dbi::open(rtxn, "geno");
+
+  auto marker_mdb = lmdb::dbi::open(rtxn, "marker");
+  auto info_mdb = lmdb::dbi::open(rtxn, "info");
+  string_view info_key,info_value;
+  info_mdb.get(rtxn,"format",info_value);
+  auto format = string(info_value);
+
+  MDB_stat stat;
+  mdb_stat(rtxn, geno_mdb, &stat);
+  auto num_markers = stat.ms_entries;
+
+  auto mdb_fetch = MDB_FIRST;
+
+  auto cursor = lmdb::cursor::open(rtxn, geno_mdb);
+  cout << "## number of total individuals = " << ni_total << endl;
+  cout << "## number of analyzed individuals = " << ni_test << endl;
+  cout << "## number of analyzed SNPs/var = " << num_markers << endl;
+
+  std::function<SnpNameValues2(size_t)>  fetch_snp = [&](size_t num) {
+
+    string_view key,value;
+
+    auto mdb_success = cursor.get(key, value, mdb_fetch);
+    mdb_fetch = MDB_NEXT;
+
+    // uint8_t chr;
+    vector<double> gs;
+    MarkerInfo markerinfo;
+
+    if (mdb_success) {
+      size_t size = 0;
+      // ---- Depending on the format we get different buffers - currently float and byte are supported:
+      if (format == "Gb") {
+        size_t num_bytes = value.size() / sizeof(uint8_t);
+        assert(num_bytes == ni_total);
+        size = num_bytes;
+        const uint8_t* gs_bbuf = reinterpret_cast<const uint8_t*>(value.data());
+        gs.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+          double g = static_cast<double>(gs_bbuf[i])/127.0;
+          gs.push_back(g);
+        }
+      } else {
+        size_t num_floats = value.size() / sizeof(float);
+        assert(num_floats == ni_total);
+        size = num_floats;
+        const float* gs_fbuf = reinterpret_cast<const float*>(value.data());
+        gs.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+          gs.push_back(static_cast<double>(gs_fbuf[i]));
+        }
+      }
+      // Start unpacking key chr+pos
+      if (key.size() != 10) {
+        cerr << "key.size=" << key.size() << endl;
+        throw std::runtime_error("Invalid key size");
+      }
+      // "S>L>L>"
+      const uint8_t* data = reinterpret_cast<const uint8_t*>(key.data());
+      auto chr = static_cast<uint8_t>(data[1]);
+      // Extract big-endian uint32
+      // uint32_t rest = static_cast<uint32_t>(data[2]);
+      uint32_t pos =  (data[2] << 24) | (data[3] << 16) |
+        (data[4] << 8) | data[5];
+
+      uint32_t num = (data[6] << 24) | (data[7] << 16) |
+        (data[8] << 8) | data[9];
+
+      // printf("%#02x %#02x\n", chr, loco_chr);
+
+      if (is_loco && loco_chr != chr) {
+        if (chr > loco_chr)
+            return make_tuple(LAST, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
+          else
+            return make_tuple(SKIP, MarkerInfo { .name="", .chr=chr, .pos=pos } , gs);
+      }
+
+      string_view value2;
+      marker_mdb.get(rtxn,key,value2);
+      auto marker = string(value2);
+      // 1       rs13476251      174792257
+      // cout << static_cast<int>(chr) << ":" << pos2 << " line " << rest2 << ":" << marker << endl ;
+
+      // compute maf and n_miss (NAs)
+      size_t n_miss = 0; // count NAs: FIXME
+      double maf = compute_maf(ni_total, ni_test, n_miss, gs.data(), indicator_idv);
+
+      markerinfo = MarkerInfo { .name=marker,.chr=chr,.pos=pos,.line_no=num,.n_miss=n_miss,.maf=maf };
+
+      // cout << "!!!!" << size << marker << ": af" << maf << " " << gs[0] << "," << gs[1] << "," << gs[2] << "," << gs[3] << endl;
+    }
+    return make_tuple(COMPUTE, markerinfo, gs);
+  };
+  LMM::mdb_analyze(fetch_snp,U,eval,UtW,Uty,W,y,num_markers);
+
+  ns_total = ns_test = num_markers; // track global number of snps in original gemma (goes to cPar)
+}
+
+
+/*
+
+Looking at the `LMM::AnalyzeBimbam` function, here are the parameters that get modified:
+
+## Modified (Mutated) Parameters:
+
+**None of the parameters are modified.**
+
+All parameters are passed as:
+- `const gsl_matrix *` - pointer to const matrix (read-only)
+- `const gsl_vector *` - pointer to const vector (read-only)
+- `const set<string>` - const set passed by value (copy)
+
+## What Actually Gets Modified:
+
+The function modifies **internal state** and **local variables** only:
+
+1. **Local variables modified**:
+   - `prev_line` - tracks current line position in file
+   - `gs` - vector that gets resized and reassigned for each SNP
+   - `infile` - file stream that gets opened, read from, and closed
+
+2. **Member variables** (implied by `this->`):
+   - `file_geno` - used to open the file (read-only here)
+   - `ni_total` - number of individuals (read-only here)
+
+3. **The lambda function `fetch_snp`** captures local variables by reference (`[&]`) and modifies:
+   - `prev_line` - incremented as lines are read
+   - `gs` - reassigned with new genotype values for each SNP
+   - `infile` - advanced through the file
+
+## Summary:
+
+**From the outside caller's perspective**: Nothing passed into `AnalyzeBimbam` gets mutated. All inputs are treated as read-only (`const`).
+
+The function's work happens through:
+- Reading from the genotype file
+- Calling `LMM::Analyze(fetch_snp, ...)` with a callback that reads SNP data
+- Any mutations likely happen inside the `LMM::Analyze` method (which would need to be examined separately to see what it modifies)
+
+*/
+
 void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                         const gsl_matrix *UtW, const gsl_vector *Uty,
                         const gsl_matrix *W, const gsl_vector *y,
                         const set<string> gwasnps) {
   debug_msg(file_geno);
   auto infilen = file_geno.c_str();
+  checkpoint("start-read-geno-file",file_geno);
 
   igzstream infile(infilen, igzstream::in);
   enforce_msg(infile, "error reading genotype file");
@@ -1942,6 +2621,153 @@ void MatrixCalcLR(const gsl_matrix *U, const gsl_matrix *UtX,
   return;
 }
 
+/*
+
+## What `CalcLambda` does:
+
+`CalcLambda` **estimates the variance component ratio (lambda)** in a linear mixed model by finding the value that maximizes either the log-likelihood or log-restricted likelihood function.
+
+### Purpose:
+
+In LMM, the variance structure is:
+- **Var(y) = λτσ² K + τσ² I**
+
+Where:
+- **λ (lambda)**: The ratio of genetic variance to environmental variance (Vg/Ve)
+- **K**: Kinship/relatedness matrix
+- **I**: Identity matrix
+
+Lambda determines how much of the phenotypic variance is explained by genetic relatedness vs random environmental effects.
+
+### Function Signature:
+```cpp
+void CalcLambda(const char func_name,      // 'R' for REML, 'L' for MLE
+                FUNC_PARAM &params,        // Model parameters
+                const double l_min,        // Min lambda to search
+                const double l_max,        // Max lambda to search
+                const size_t n_region,     // # intervals to divide search
+                double &lambda,            // OUTPUT: estimated lambda
+                double &logf)              // OUTPUT: log-likelihood at lambda
+```
+
+### Algorithm Overview:
+
+#### **Phase 1: Identify Candidate Intervals**
+```cpp
+for (size_t i = 0; i < n_region; ++i) {
+  lambda_l = l_min * exp(lambda_interval * i);
+  lambda_h = l_min * exp(lambda_interval * (i + 1.0));
+
+  dev1_l = LogRL_dev1(lambda_l, &params);  // First derivative at lower bound
+  dev1_h = LogRL_dev1(lambda_h, &params);  // First derivative at upper bound
+
+  if (dev1_l * dev1_h <= 0) {
+    lambda_lh.push_back(make_pair(lambda_l, lambda_h));  // Sign change = root
+  }
+}
+```
+- Divides the search range [l_min, l_max] into `n_region` intervals (log-spaced)
+- Evaluates the **first derivative** at interval boundaries
+- If the derivative changes sign, there's a local maximum in that interval
+
+#### **Phase 2: Find Maximum**
+
+**Case A: No sign changes found**
+```cpp
+if (lambda_lh.empty()) {
+  // Maximum is at a boundary
+  if (logf_l >= logf_h) {
+    lambda = l_min;
+  } else {
+    lambda = l_max;
+  }
+}
+```
+
+**Case B: Sign changes found (interior maximum)**
+
+For each candidate interval:
+
+1. **Brent's Method** (robust root-finding):
+   ```cpp
+   gsl_root_fsolver_brent
+   ```
+   - Finds where the first derivative = 0 (critical point)
+   - Doesn't require computing second derivatives
+   - Robust but slower
+
+2. **Newton-Raphson Method** (refinement):
+   ```cpp
+   gsl_root_fdfsolver_newton
+   ```
+   - Uses both first and second derivatives
+   - Refines the estimate from Brent's method
+   - Faster convergence near the root
+
+3. **Compare across intervals**:
+   ```cpp
+   if (logf < logf_l) {
+     logf = logf_l;
+     lambda = l;
+   }
+   ```
+   - Keeps track of the lambda with the highest log-likelihood
+   - Handles multiple local maxima
+
+#### **Phase 3: Check Boundaries**
+```cpp
+if (logf_l > logf) {
+  lambda = l_min;
+}
+if (logf_h > logf) {
+  lambda = l_max;
+}
+```
+- Ensures the global maximum isn't at a boundary
+
+### Function Types:
+
+- **`func_name = 'R'` or `'r'`**: REML (Restricted Maximum Likelihood)
+  - Uses `LogRL_f`, `LogRL_dev1`, `LogRL_dev2`
+  - Accounts for loss of degrees of freedom from fixed effects
+  - Preferred for variance component estimation
+
+- **`func_name = 'L'` or `'l'`**: MLE (Maximum Likelihood)
+  - Uses `LogL_f`, `LogL_dev1`, `LogL_dev2`
+  - Direct likelihood maximization
+  - Used for likelihood ratio tests
+
+### Error Handling:
+
+```cpp
+if (status == GSL_CONTINUE || status != GSL_SUCCESS) {
+  logf = nan("NAN");
+  lambda = nan("NAN");
+  return;
+}
+```
+- If optimization fails, returns NaN values
+- Warnings issued for non-convergence
+
+### In Context (from `batch_compute`):
+
+```cpp
+// REML for Wald test
+CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+
+// MLE for LRT test
+CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+```
+
+### Summary:
+
+**`CalcLambda` finds the optimal variance component ratio (λ)** that maximizes the (restricted) log-likelihood for a given SNP's genotype data in a linear mixed model. This λ value quantifies the relative contribution of genetic relatedness vs environmental noise to phenotypic variation, and is essential for computing association test statistics (Wald, LRT) while properly accounting for population structure and relatedness.
+
+*/
+
+
 void CalcLambda(const char func_name, FUNC_PARAM &params, const double l_min,
                 const double l_max, const size_t n_region, double &lambda,
                 double &logf) {
@@ -2291,6 +3117,7 @@ void LMM::AnalyzeBimbamGXE(const gsl_matrix *U, const gsl_vector *eval,
     cout << "error reading genotype file:" << file_geno << endl;
     return;
   }
+  checkpoint("start-read-geno-file",file_geno);
 
   clock_t time_start = clock();
 
diff --git a/src/lmm.h b/src/lmm.h
index 736c679..295602a 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -2,7 +2,7 @@
     Genome-wide Efficient Mixed Model Association (GEMMA)
     Copyright © 2011-2017, Xiang Zhou
     Copyright © 2017, Peter Carbonetto
-    Copyright © 2017, Pjotr Prins
+    Copyright © 2017-2025, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -30,7 +30,7 @@
 
 using namespace std;
 
-#define LMM_BATCH_SIZE 20000 // used for batch processing
+#define LMM_BATCH_SIZE 5000 // used for batch processing
 
 class FUNC_PARAM {
 
@@ -44,7 +44,42 @@ public:
   size_t e_mode;
 };
 
-typedef std::tuple<string,std::vector<double> > SnpNameValues;
+
+// typedef tuple< string, uint16_t, uint32_t, uint32_t > MarkerInfo; // name, chr, pos, line
+struct MarkerInfo {
+  string name;
+  uint8_t chr;
+  size_t pos, line_no, n_miss;
+  double maf;
+} ;
+
+typedef vector<MarkerInfo> Markers;
+typedef tuple< string,vector<double> > SnpNameValues;
+
+enum MarkerState {
+  FAIL,
+  COMPUTE,
+  SKIP,
+  LAST
+};
+
+typedef tuple< MarkerState,MarkerInfo,vector<double> > SnpNameValues2; // success, markerinfo (maf and n_miss are computed)
+// Results for LMM.
+class SUMSTAT2 {
+public:
+  MarkerInfo markerinfo;
+  double beta;         // REML estimator for beta.
+  double se;           // SE for beta.
+  double lambda_remle; // REML estimator for lambda.
+  double lambda_mle;   // MLE estimator for lambda.
+  double p_wald;       // p value from a Wald test.
+  double p_lrt;        // p value from a likelihood ratio test.
+  double p_score;      // p value from a score test.
+  double logl_H1;      // log likelihood under the alternative
+                       // hypothesis as a measure of goodness of fit,
+                       // see https://github.com/genetics-statistics/GEMMA/issues/81
+};
+
 
 class LMM {
 
@@ -100,6 +135,14 @@ public:
                const gsl_matrix *UtW, const gsl_vector *Uty,
                const gsl_matrix *W, const gsl_vector *y,
                const set<string> gwasnps);
+  void mdb_analyze(std::function< SnpNameValues2(size_t) >& fetch_snp,
+                   const gsl_matrix *U, const gsl_vector *eval,
+                   const gsl_matrix *UtW, const gsl_vector *Uty,
+                   const gsl_matrix *W, const gsl_vector *y, size_t num_markers);
+  void mdb_calc_gwa(const gsl_matrix *U, const gsl_vector *eval,
+                    const gsl_matrix *UtW, const gsl_vector *Uty,
+                    const gsl_matrix *W, const gsl_vector *y,
+                    const string loco);
   void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                      const gsl_matrix *UtW, const gsl_vector *Uty,
                      const gsl_matrix *W, const gsl_vector *y,
diff --git a/src/main.cpp b/src/main.cpp
index deadc63..ee6d597 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,6 +22,7 @@
 #include <sstream>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <guile_api.h>
 
 using namespace std;
 
@@ -31,6 +32,9 @@ int main(int argc, char *argv[]) {
 
   gsl_set_error_handler (&gemma_gsl_error_handler);
 
+  gemmaguile::global_start_guile();
+  gemmaguile::start_test();
+
   if (argc <= 1) {
     cGemma.PrintHeader();
     cGemma.PrintHelp(0);
diff --git a/src/mathfunc.cpp b/src/mathfunc.cpp
index 68d17a7..c82872e 100644
--- a/src/mathfunc.cpp
+++ b/src/mathfunc.cpp
@@ -494,6 +494,20 @@ void StandardizeVector(gsl_vector *y) {
 }
 
 // Calculate UtX (U gets transposed)
+/*
+CalcUtX transforms a genotype matrix by the eigenvector matrix from the kinship matrix decomposition.
+Purpose:
+
+Computes UtX = U^T × X, where:
+
+    U: Eigenvector matrix from the spectral decomposition of the kinship matrix K
+    X: Genotype matrix (individuals × SNPs)
+    UtX: Transformed genotype matrix
+
+This transformation rotates the genotype data into the eigenspace where the kinship-induced correlations become diagonal, simplifying likelihood calculations.
+
+CalcUtX transforms a genotype matrix into the eigenspace by computing U^T × X, where U contains the eigenvectors from the kinship matrix decomposition. This transformation diagonalizes the correlation structure induced by relatedness, making subsequent likelihood calculations tractable and efficient. The function overwrites its input with the transformed result, using a temporary copy to avoid data corruption.
+*/
 void CalcUtX(const gsl_matrix *U, gsl_matrix *UtX) {
   gsl_matrix *X = gsl_matrix_safe_alloc(UtX->size1, UtX->size2);
   gsl_matrix_safe_memcpy(X, UtX);
@@ -631,3 +645,36 @@ double UcharToDouble02(const unsigned char c) { return (double)c * 0.01; }
 unsigned char Double02ToUchar(const double dosage) {
   return (int)(dosage * 100);
 }
+
+
+std::tuple<size_t, size_t, size_t> compute_ratio(size_t ni_total, const gsl_vector *gs) {
+  size_t n_0 = 0;
+  size_t n_1 = 0;
+  size_t n_2 = 0;
+  for (size_t i = 0; i < ni_total; ++i) { // read genotypes
+    double geno = gsl_vector_get(gs, i);
+    if (geno >= 0 && geno <= 0.5) {
+      n_0++;
+    }
+    if (geno > 0.5 && geno < 1.5) {
+      n_1++;
+    }
+    if (geno >= 1.5 && geno <= 2.0) {
+      n_2++;
+    }
+  }
+  return {n_0,n_1,n_2};
+}
+
+double compute_maf(size_t ni_total, size_t ni_test, size_t n_miss, const double *gs, const vector<int> &indicator) {
+  double maf = 0.0;
+
+  for (size_t i = 0; i < ni_total; ++i) { // read genotypes
+    if (indicator[i]) {
+      double geno = gs[i];
+      maf += geno; // 0..2 range
+    }
+  }
+  maf /= 2.0 * (double)(ni_test - n_miss); // Assumption is that geno value is between 0 and 2. FIXME
+  return maf;
+}
diff --git a/src/mathfunc.h b/src/mathfunc.h
index 641d0a3..f7c9e6e 100644
--- a/src/mathfunc.h
+++ b/src/mathfunc.h
@@ -24,6 +24,7 @@
 // #include "Eigen/Dense"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
+#include <tuple>
 
 #define CONDITIONED_MAXRATIO 2e+6 // based on http://mathworld.wolfram.com/ConditionNumber.html
 #define EIGEN_MINVALUE 1e-10
@@ -77,4 +78,8 @@ unsigned char Double02ToUchar(const double dosage);
 // void uchar_matrix_get_row(const vector<vector<unsigned char>> &X,
 //                          const size_t i_row, Eigen::VectorXd &x_row);
 
+std::tuple<size_t, size_t, size_t> compute_ratio(size_t ni_total, const gsl_vector *gs);
+double compute_maf(size_t ni_total, size_t ni_test, size_t n_miss, const double *gs, const vector<int> &indicator);
+
+
 #endif
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index eb23900..1f64c3a 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -210,6 +210,31 @@ void MVLMM::WriteFiles() {
 }
 
 // Below are functions for EM algorithm.
+
+/*
+  src/mvlmm.cpp
+> describe `EigenProc`
+
+The EigenProc function performs the following steps:
+
+ 1 Eigen Decomposition of V_e: It computes the eigen decomposition of the matrix V_e to obtain the eigenvectors
+   and eigenvalues. This is used to transform V_e into a diagonal form.
+ 2 Calculate V_e_h and V_e_hi: It calculates the matrices V_e_h and V_e_hi, which are used to transform the
+   original matrices into a space where the eigenvalues are more manageable. V_e_h is a scaled version of the
+   eigenvectors, and V_e_hi is the inverse of V_e_h.
+ 3 Calculate Lambda: It computes the matrix Lambda as V_ehi * V_g * V_ehi, which is a transformed version of
+   V_g in the space defined by the eigenvectors of V_e.
+ 4 Eigen Decomposition of Lambda: It performs an eigen decomposition on Lambda to obtain the eigenvectors and
+   eigenvalues, which are used to further transform the matrices.
+ 5 Calculate UltVeh and UltVehi: It calculates UltVeh and UltVehi, which are used in subsequent calculations to
+   transform vectors and matrices into the space defined by the eigenvectors of Lambda.
+ 6 Return logdet_Ve: The function returns the logarithm of the determinant of V_e, which is used in likelihood
+   calculations.
+
+This function is crucial for transforming the problem into a space where the matrices are diagonal, simplifying
+many calculations in the context of mixed models.
+
+ */
 double EigenProc(const gsl_matrix *V_g, const gsl_matrix *V_e, gsl_vector *D_l,
                  gsl_matrix *UltVeh, gsl_matrix *UltVehi) {
   size_t d_size = V_g->size1;
diff --git a/src/param.cpp b/src/param.cpp
index f96e9c3..96a9cd2 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -2,7 +2,7 @@
     Genome-wide Efficient Mixed Model Association (GEMMA)
     Copyright © 2011-2017, Xiang Zhou
     Copyright © 2017, Peter Carbonetto
-    Copyright © 2017, Pjotr Prins
+    Copyright © 2017-2025, Pjotr Prins
 
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -40,6 +40,7 @@
 #include "mathfunc.h"
 #include "param.h"
 #include "fastblas.h"
+#include "checkpoint.h"
 
 using namespace std;
 
@@ -104,7 +105,9 @@ PARAM::PARAM(void)
       randseed(-1), window_cm(0), window_bp(0), window_ns(0), n_block(200),
       error(false), ni_subsample(0), n_cvt(1), n_cat(0), n_vc(1),
       time_total(0.0), time_G(0.0), time_eigen(0.0), time_UtX(0.0),
-      time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {}
+      time_UtZ(0.0), time_opt(0.0), time_Omega(0.0) {
+    is_mdb = false;
+  }
 
 PARAM::~PARAM() {
   if (gsl_r)
@@ -113,6 +116,7 @@ PARAM::~PARAM() {
 
 // Read files: obtain ns_total, ng_total, ns_test, ni_test.
 void PARAM::ReadFiles(void) {
+  checkpoint_nofile("PARAM::ReadFiles");
   string file_str;
 
   // Read cat file.
@@ -145,22 +149,24 @@ void PARAM::ReadFiles(void) {
     }
   }
 
-  // Read SNP set.
-  if (!file_snps.empty()) {
-    if (ReadFile_snps(file_snps, setSnps) == false) {
-      error = true;
+  if (!is_mdb) {
+    // Read SNP set into setSnps (without filtering)
+    if (!file_snps.empty()) {
+      if (ReadFile_snps(file_snps, setSnps) == false) {
+        error = true;
+      }
+    } else {
+      setSnps.clear();
     }
-  } else {
-    setSnps.clear();
-  }
 
-  // Read KSNP set.
-  if (!file_ksnps.empty()) {
-    if (ReadFile_snps(file_ksnps, setKSnps) == false) {
-      error = true;
+    // Also read KSNP set. Reads all Snps later to be used by GRM.
+    if (!file_ksnps.empty()) {
+      if (ReadFile_snps(file_ksnps, setKSnps) == false) {
+        error = true;
+      }
+    } else {
+      setKSnps.clear();
     }
-  } else {
-    setKSnps.clear();
   }
 
   // For prediction.
@@ -180,13 +186,15 @@ void PARAM::ReadFiles(void) {
       }
     }
 
+
     if (!file_geno.empty()) {
-      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) ==
-          false) {
+      // --- Read (multi-)column phenotypes into pheno and set the NA indicator
+      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
         error = true;
       }
 
-      if (CountFileLines(file_geno, ns_total) == false) {
+      // --- compute ns_total by readin geno file
+      if (!is_mdb && is_check_mode() && CountFileLines(file_geno, ns_total) == false) {
         error = true;
       }
     }
@@ -215,8 +223,6 @@ void PARAM::ReadFiles(void) {
       indicator_idv.push_back(k);
     }
 
-    ns_test = 0;
-
     return;
   }
 
@@ -273,7 +279,7 @@ void PARAM::ReadFiles(void) {
 
     // Post-process covariates and phenotypes, obtain
     // ni_test, save all useful covariates.
-    ProcessCvtPhen();
+    ProcessInclusionIndicators();
 
     // Obtain covariate matrix.
     auto W1 = gsl_matrix_safe_alloc(ni_test, n_cvt);
@@ -290,7 +296,7 @@ void PARAM::ReadFiles(void) {
   }
 
   // Read genotype and phenotype file for BIMBAM format.
-  if (!file_geno.empty()) {
+  if (!is_mdb && !file_geno.empty()) {
 
     // Annotation file before genotype file.
     if (!file_anno.empty()) {
@@ -299,14 +305,14 @@ void PARAM::ReadFiles(void) {
       }
     }
 
-    // Phenotype file before genotype file.
+    // Phenotype file before genotype file. Already done this(?!)
     if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
       error = true;
     }
 
     // Post-process covariates and phenotypes, obtain
     // ni_test, save all useful covariates.
-    ProcessCvtPhen();
+    ProcessInclusionIndicators();
 
     // Obtain covariate matrix.
     auto W2 = gsl_matrix_safe_alloc(ni_test, n_cvt);
@@ -314,9 +320,10 @@ void PARAM::ReadFiles(void) {
 
     trim_individuals(indicator_idv, ni_max);
     trim_individuals(indicator_cvt, ni_max);
-    if (ReadFile_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp,
-                      maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
-                      mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
+    // The following reads the geno file to get the SNPs - only for BIMBAM
+    if (ReadFile_bimbam_geno(file_geno, setSnps, W2, indicator_idv, indicator_snp,
+                                          maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
+                                          mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
       error = true;
       return;
     }
@@ -324,6 +331,17 @@ void PARAM::ReadFiles(void) {
 
     ns_total = indicator_snp.size();
   }
+  // lmdb-type genotype file:
+  if (is_mdb && !file_geno.empty()) {
+    if (!file_kin.empty()) { // GWA
+      // Phenotype file before genotype file. Already done this(?!)
+      if (ReadFile_pheno(file_pheno, indicator_pheno, pheno, p_column) == false) {
+        error = true;
+      }
+      ProcessInclusionIndicators();
+      ns_total = indicator_snp.size();
+    }
+  }
 
   // Read genotype file for multiple PLINK files.
   if (!file_mbfile.empty()) {
@@ -360,7 +378,7 @@ void PARAM::ReadFiles(void) {
 
         // Post-process covariates and phenotypes, obtain
         // ni_test, save all useful covariates.
-        ProcessCvtPhen();
+        ProcessInclusionIndicators();
 
         // Obtain covariate matrix.
         W3 = gsl_matrix_safe_alloc(ni_test, n_cvt);
@@ -387,8 +405,8 @@ void PARAM::ReadFiles(void) {
     infile.clear();
   }
 
-  // Read genotype and phenotype file for multiple BIMBAM files.
-  if (!file_mgeno.empty()) {
+  // Read genotype and phenotype file for multiple BIMBAM files. Note this goes untested.
+  if (!is_mdb && !file_mgeno.empty()) {
 
     // Annotation file before genotype file.
     if (!file_anno.empty()) {
@@ -404,7 +422,7 @@ void PARAM::ReadFiles(void) {
 
     // Post-process covariates and phenotypes, obtain ni_test,
     // save all useful covariates.
-    ProcessCvtPhen();
+    ProcessInclusionIndicators();
 
     // Obtain covariate matrix.
     gsl_matrix *W4 = gsl_matrix_safe_alloc(ni_test, n_cvt);
@@ -420,7 +438,7 @@ void PARAM::ReadFiles(void) {
     string file_name;
     size_t ns_test_tmp;
     while (!safeGetline(infile, file_name).eof()) {
-      if (ReadFile_geno(file_name, setSnps, W4, indicator_idv, indicator_snp,
+      if (ReadFile_bimbam_geno(file_name, setSnps, W4, indicator_idv, indicator_snp,
                         maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
                         mapRS2bp, mapRS2cM, snpInfo, ns_test_tmp) == false) {
         error = true;
@@ -457,7 +475,7 @@ void PARAM::ReadFiles(void) {
 
     // Post-process covariates and phenotypes, obtain
     // ni_test, save all useful covariates.
-    ProcessCvtPhen();
+    ProcessInclusionIndicators();
 
     // Obtain covariate matrix.
     // gsl_matrix *W5 = gsl_matrix_alloc(ni_test, n_cvt);
@@ -480,7 +498,8 @@ void PARAM::ReadFiles(void) {
       ni_test += indicator_idv[i];
     }
 
-    enforce_msg(ni_test,"number of analyzed individuals equals 0.");
+    if (!is_mdb)
+      enforce_msg(ni_test,"number of analyzed individuals equals 0.");
   }
 
   // For ridge prediction, read phenotype only.
@@ -491,7 +510,7 @@ void PARAM::ReadFiles(void) {
 
     // Post-process covariates and phenotypes, obtain
     // ni_test, save all useful covariates.
-    ProcessCvtPhen();
+    ProcessInclusionIndicators();
   }
 
   // Compute setKSnps when -loco is passed in
@@ -920,13 +939,18 @@ void PARAM::CheckParam(void) {
   enforce_fexists(file_gwasnps, "open file");
   enforce_fexists(file_anno, "open file");
 
+  if (file_geno.find(".mdb") != string::npos) {
+    is_mdb = true;
+  }
+
   if (!loco.empty()) {
     enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 9 || a_mode == 21 || a_mode == 22,
                 "LOCO only works with LMM and K");
     // enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
     enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)");
-    enforce_msg(!file_anno.empty(),
-                "LOCO requires annotation file (-a switch)");
+    if (!is_mdb)
+      enforce_msg(!file_anno.empty(), "Without mdb LOCO requires annotation file (-a switch)");
+
     enforce_msg(file_ksnps.empty(), "LOCO does not allow -ksnps switch");
     enforce_msg(file_gwasnps.empty(), "LOCO does not allow -gwasnps switch");
   }
@@ -1078,13 +1102,15 @@ void PARAM::CheckData(void) {
     }
   }
 
-  enforce_msg(ni_test,"number of analyzed individuals equals 0.");
+  if (!is_mdb) {
+    enforce_msg(ni_test,"number of analyzed individuals equals 0.");
 
-  if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() &&
-      file_study.empty() && file_beta.empty() && file_bf.empty()) {
-    error = true;
-    cout << "error! number of analyzed individuals equals 0. " << endl;
-    return;
+    if (ni_test == 0 && file_cor.empty() && file_mstudy.empty() &&
+        file_study.empty() && file_beta.empty() && file_bf.empty()) {
+      error = true;
+      cout << "error! number of analyzed individuals equals 0. " << endl;
+      return;
+    }
   }
 
   if (a_mode == 43) {
@@ -1103,7 +1129,7 @@ void PARAM::CheckData(void) {
   }
 
   // Output some information.
-  if (file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
+  if (!is_mdb && file_cor.empty() && file_mstudy.empty() && file_study.empty() &&
       a_mode != 15 && a_mode != 27 && a_mode != 28) {
     cout << "## number of total individuals = " << ni_total << endl;
     if (a_mode == 43) {
@@ -1251,14 +1277,14 @@ void PARAM::CheckData(void) {
 
 void PARAM::PrintSummary() {
   if (n_ph == 1) {
-    cout << "pve estimate =" << pve_null << endl;
-    cout << "se(pve) =" << pve_se_null << endl;
+    cout << "##   pve estimate = " << pve_null << endl;
+    cout << "##   se(pve) = " << pve_se_null << endl;
   } else {
   }
   return;
 }
 
-void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
+void PARAM::ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
   string file_str;
 
   if (!file_bfile.empty()) {
@@ -1268,8 +1294,7 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
       error = true;
     }
   } else {
-    if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K,
-                      calc_K) == false) {
+      if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, UtX, K, calc_K) == false) {
       error = true;
     }
   }
@@ -1277,44 +1302,25 @@ void PARAM::ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K) {
   return;
 }
 
-void PARAM::ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                          const bool calc_K) {
-  string file_str;
-
-  if (!file_bfile.empty()) {
-    file_str = file_bfile + ".bed";
-    if (ReadFile_bed(file_str, indicator_idv, indicator_snp, Xt, K, calc_K,
-                     ni_test, ns_test) == false) {
-      error = true;
-    }
-  } else {
-    if (ReadFile_geno(file_geno, indicator_idv, indicator_snp, Xt, K, calc_K,
-                      ni_test, ns_test) == false) {
-      error = true;
-    }
-  }
-
-  return;
+gsl_matrix *PARAM::MdbCalcKin() {
+  return mdb_calc_kin(file_geno, a_mode - 20, loco);
 }
 
 void PARAM::CalcKin(gsl_matrix *matrix_kin) {
+  checkpoint_nofile("PARAM::CalcKin");
   string file_str;
-
   gsl_matrix_set_zero(matrix_kin);
 
   if (!file_bfile.empty()) {
     file_str = file_bfile + ".bed";
-    // enforce_msg(loco.empty(), "FIXME: LOCO nyi");
     if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
         false) {
       error = true;
     }
   } else {
-    file_str = file_geno;
-    if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace,
-                  matrix_kin, ni_max == 0) == false) {
-      error = true;
-    }
+    // indicator_snp is an array of bool
+    error = !BimbamKin(file_geno, setKSnps, indicator_snp, a_mode - 20, d_pace,
+                       matrix_kin, ni_max == 0);
   }
 
   return;
@@ -1990,7 +1996,7 @@ void PARAM::CheckCvt() {
 }
 
 // Post-process phenotypes and covariates.
-void PARAM::ProcessCvtPhen() {
+void PARAM::ProcessInclusionIndicators() {
 
   // Convert indicator_pheno to indicator_idv.
   int k = 1;
@@ -2028,11 +2034,8 @@ void PARAM::ProcessCvtPhen() {
 
   // Obtain ni_test.
   ni_test = 0;
-  for (vector<int>::size_type i = 0; i < (indicator_idv).size(); ++i) {
-    if (indicator_idv[i] == 0) {
-      continue;
-    }
-    ni_test++;
+  for(auto &i : indicator_idv) {
+    ni_test += indicator_idv[i];
   }
 
   // If subsample number is set, perform a random sub-sampling
@@ -2071,13 +2074,14 @@ void PARAM::ProcessCvtPhen() {
   }
 
   // Check ni_test.
-  if (a_mode != M_BSLMM5)
-    enforce_msg(ni_test,"number of analyzed individuals equals 0.");
-  if (ni_test == 0 && a_mode != M_BSLMM5) {
-    error = true;
-    cout << "error! number of analyzed individuals equals 0. " << endl;
+  if (!is_mdb) {
+    if (a_mode != M_BSLMM5)
+      enforce_msg(ni_test,"number of analyzed individuals equals 0.");
+    if (ni_test == 0 && a_mode != M_BSLMM5) {
+      error = true;
+      cout << "error! number of analyzed individuals equals 0. " << endl;
+    }
   }
-
   // Check covariates to see if they are correlated with each
   // other, and to see if the intercept term is included.
   // After getting ni_test.
diff --git a/src/param.h b/src/param.h
index d3ce686..046f543 100644
--- a/src/param.h
+++ b/src/param.h
@@ -160,6 +160,8 @@ public:
   string file_ksnps;   // File SNPs for computing K
   string file_gwasnps; // File SNPs for computing GWAS
 
+  bool is_mdb;
+
   // QC-related parameters.
   double miss_level;
   double maf_level;
@@ -336,17 +338,16 @@ public:
   void CheckParam();
   void CheckData();
   void PrintSummary();
-  void ReadGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
-  void ReadGenotypes(vector<vector<unsigned char>> &Xt, gsl_matrix *K,
-                     const bool calc_K);
+  void ReadBIMBAMGenotypes(gsl_matrix *UtX, gsl_matrix *K, const bool calc_K);
   void CheckCvt();
   void CopyCvt(gsl_matrix *W);
   void CopyA(size_t flag, gsl_matrix *A);
   void CopyGxe(gsl_vector *gxe);
   void CopyWeight(gsl_vector *w);
-  void ProcessCvtPhen();
+  void ProcessInclusionIndicators();
   void CopyCvtPhen(gsl_matrix *W, gsl_vector *y, size_t flag);
   void CopyCvtPhen(gsl_matrix *W, gsl_matrix *Y, size_t flag);
+  gsl_matrix *MdbCalcKin();
   void CalcKin(gsl_matrix *matrix_kin);
   void CalcS(const map<string, double> &mapRS2wA,
              const map<string, double> &mapRS2wK, const gsl_matrix *W,
@@ -368,6 +369,9 @@ public:
                    const map<string, double> &mapRS2z, gsl_vector *w,
                    gsl_vector *z, vector<size_t> &vec_cat);
   void UpdateSNP(const map<string, double> &mapRS2wA);
+
+  inline bool is_loco() { return !loco.empty(); }
+
 };
 
 size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
diff --git a/src/vc.cpp b/src/vc.cpp
index 22aaea9..970c556 100644
--- a/src/vc.cpp
+++ b/src/vc.cpp
@@ -1021,7 +1021,7 @@ void ReadFile_cor(const string &file_cor, const vector<string> &vec_rs,
 
   string rs1, rs2;
   double d1, d2, d3, cor, var1, var2;
-  size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin;
+  size_t n_nb, nsamp1, nsamp2, n12, bin_size = 10, bin = 0;
 
   vector<vector<double>> mat_S, mat_Svar, mat_tmp;
   vector<double> vec_qvar, vec_tmp;
diff --git a/src/version.h b/src/version.h
index 3382003..02d4c09 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1,5 +1,4 @@
 // version.h generated by GEMMA scripts/gen_version_info.sh
-#define GEMMA_VERSION "0.0.1"
-#define GEMMA_DATE "2025-01-04"
+#define GEMMA_VERSION "1.0.0"
+#define GEMMA_DATE "2025-11-22"
 #define GEMMA_YEAR "2025"
-#define GEMMA_PROFILE "/gnu/store/ln160n2kzn791jwgv36yrxlxygjwl9hh-profile"