about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/debug.h46
-rw-r--r--src/gemma.cpp49
-rw-r--r--src/io.cpp117
-rw-r--r--src/io.h29
-rw-r--r--src/lmm.cpp203
-rw-r--r--src/lmm.h6
-rw-r--r--src/mvlmm.cpp6
-rw-r--r--src/param.cpp146
-rw-r--r--src/param.h30
-rw-r--r--src/vc.h2
10 files changed, 431 insertions, 203 deletions
diff --git a/src/debug.h b/src/debug.h
new file mode 100644
index 0000000..84637e1
--- /dev/null
+++ b/src/debug.h
@@ -0,0 +1,46 @@
+#ifndef __DEBUG_H__
+#define __DEBUG_H__
+
+// enforce works like assert but also when NDEBUG is set (i.e., it
+// always works). enforce_msg prints message instead of expr
+
+#if defined NDEBUG
+#define debug_msg(msg)
+#else
+#define debug_msg(msg) cout << "**** DEBUG: " << msg << endl;
+#endif
+
+/* This prints an "Assertion failed" message and aborts.  */
+extern "C" void __assert_fail(const char *__assertion, const char *__file,
+                              unsigned int __line,
+                              const char *__function) __THROW
+    __attribute__((__noreturn__));
+
+#define __ASSERT_FUNCTION __PRETTY_FUNCTION__
+
+#define enforce(expr)                                                          \
+  ((expr)                                                                      \
+       ? __ASSERT_VOID_CAST(0)                                                 \
+       : __assert_fail(__STRING(expr), __FILE__, __LINE__, __ASSERT_FUNCTION))
+
+#define enforce_msg(expr, msg)                                                 \
+  ((expr) ? __ASSERT_VOID_CAST(0)                                              \
+          : __assert_fail(msg, __FILE__, __LINE__, __ASSERT_FUNCTION))
+
+#define enforce_str(expr, msg)                                                 \
+  ((expr)                                                                      \
+       ? __ASSERT_VOID_CAST(0)                                                 \
+       : __assert_fail((msg).c_str(), __FILE__, __LINE__, __ASSERT_FUNCTION))
+
+// Helpers to create a unique varname per MACRO
+#define COMBINE1(X, Y) X##Y
+#define COMBINE(X, Y) COMBINE1(X, Y)
+
+#define enforce_gsl(expr)                                                      \
+  auto COMBINE(res, __LINE__) = (expr);                                        \
+  (COMBINE(res, __LINE__) == 0                                                 \
+       ? __ASSERT_VOID_CAST(0)                                                 \
+       : __assert_fail(gsl_strerror(COMBINE(res, __LINE__)), __FILE__,         \
+                       __LINE__, __ASSERT_FUNCTION))
+
+#endif
diff --git a/src/gemma.cpp b/src/gemma.cpp
index c72475b..0fb8c54 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -151,6 +151,7 @@ void GEMMA::PrintHelp(size_t option) {
     cout << " 11: obtain predicted values" << endl;
     cout << " 12: calculate snp variance covariance" << endl;
     cout << " 13: note" << endl;
+    cout << " 14: debug options" << endl;
     cout << endl;
   }
 
@@ -446,7 +447,16 @@ void GEMMA::PrintHelp(size_t option) {
   }
 
   if (option == 4) {
-    cout << " RELATEDNESS MATRIX CALCULATION OPTIONS" << endl;
+    cout << " RELATEDNESS MATRIX (K) CALCULATION OPTIONS" << endl;
+    cout << " -ksnps    [filename]     "
+         << " specify input snps file name to compute K" << endl;
+    cout << " -loco     [chr]          "
+         << " leave one chromosome out (LOCO) by name (requires -a annotation "
+            "file)"
+         << endl;
+    cout << " -a        [filename]     "
+         << " specify input BIMBAM SNP annotation file name (LOCO only)"
+         << endl;
     cout << " -gk       [num]          "
          << " specify which type of kinship/relatedness matrix to generate "
             "(default 1)"
@@ -682,6 +692,14 @@ void GEMMA::PrintHelp(size_t option) {
     cout << endl;
   }
 
+  if (option == 14) {
+    cout << " DEBUG OPTIONS" << endl;
+    cout << " -silence                 silent terminal display" << endl;
+    cout << " -debug                   debug output" << endl;
+    cout << " -nind       [num]        read up to num individuals" << endl;
+    cout << endl;
+  }
+
   return;
 }
 
@@ -1008,7 +1026,7 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.clear();
       str.assign(argv[i]);
       cPar.k_mode = atoi(str.c_str());
-    } else if (strcmp(argv[i], "-n") == 0) {
+    } else if (strcmp(argv[i], "-n") == 0) { // set pheno column (range)
       (cPar.p_column).clear();
       while (argv[i + 1] != NULL && argv[i + 1][0] != '-') {
         ++i;
@@ -1076,6 +1094,12 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       cPar.r2_level = atof(str.c_str());
     } else if (strcmp(argv[i], "-notsnp") == 0) {
       cPar.maf_level = -1;
+    } else if (strcmp(argv[i], "-loco") == 0) {
+      assert(argv[i + 1]);
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.loco = str;
     } else if (strcmp(argv[i], "-gk") == 0) {
       if (cPar.a_mode != 0) {
         cPar.error = true;
@@ -1315,6 +1339,14 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.clear();
       str.assign(argv[i]);
       cPar.nr_iter = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-nind") == 0) {
+      if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
+        continue;
+      }
+      ++i;
+      str.clear();
+      str.assign(argv[i]);
+      cPar.ni_max = atoi(str.c_str()); // for testing purposes
     } else if (strcmp(argv[i], "-emp") == 0) {
       if (argv[i + 1] == NULL || argv[i + 1][0] == '-') {
         continue;
@@ -1533,6 +1565,8 @@ void GEMMA::Assign(int argc, char **argv, PARAM &cPar) {
       str.clear();
       str.assign(argv[i]);
       cPar.window_ns = atoi(str.c_str());
+    } else if (strcmp(argv[i], "-debug") == 0) {
+      cPar.mode_debug = true;
     } else {
       cout << "error! unrecognized option: " << argv[i] << endl;
       cPar.error = true;
@@ -1808,14 +1842,16 @@ void GEMMA::BatchRun(PARAM &cPar) {
     gsl_matrix_free(H_full);
   }
 
-  // Generate Kinship matrix
+  // Generate Kinship matrix (optionally using LOCO)
   if (cPar.a_mode == 21 || cPar.a_mode == 22) {
     cout << "Calculating Relatedness Matrix ... " << endl;
 
     gsl_matrix *G = gsl_matrix_alloc(cPar.ni_total, cPar.ni_total);
+    enforce_msg(G, "allocate G"); // just to be sure
 
     time_start = clock();
     cPar.CalcKin(G);
+
     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
     if (cPar.error == true) {
       cout << "error! fail to calculate relatedness matrix. " << endl;
@@ -2613,6 +2649,7 @@ return;}
       cPar.a_mode == 4 || cPar.a_mode == 5 ||
       cPar.a_mode == 31) { // Fit LMM or mvLMM or eigen
     gsl_matrix *Y = gsl_matrix_alloc(cPar.ni_test, cPar.n_ph);
+    enforce_msg(Y, "allocate Y"); // just to be sure
     gsl_matrix *W = gsl_matrix_alloc(Y->size1, cPar.n_cvt);
     gsl_matrix *B = gsl_matrix_alloc(Y->size2, W->size2); // B is a d by c
                                                           // matrix
@@ -2749,7 +2786,7 @@ return;}
       CalcUtX(U, Y, UtY);
 
       // calculate REMLE/MLE estimate and pve for univariate model
-      if (cPar.n_ph == 1) {
+      if (cPar.n_ph == 1) { // one phenotype
         gsl_vector_view beta = gsl_matrix_row(B, 0);
         gsl_vector_view se_beta = gsl_matrix_row(se_B, 0);
         gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
@@ -2819,7 +2856,7 @@ return;}
         }
       }
 
-      // Fit LMM or mvLMM
+      // Fit LMM or mvLMM (w. LOCO)
       if (cPar.a_mode == 1 || cPar.a_mode == 2 || cPar.a_mode == 3 ||
           cPar.a_mode == 4) {
         if (cPar.n_ph == 1) {
@@ -2844,7 +2881,7 @@ return;}
           } else {
             if (cPar.file_gxe.empty()) {
               cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
-                                 &Y_col.vector);
+                                 &Y_col.vector, cPar.setGWASnps);
             } else {
               cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
                                     &Y_col.vector, env);
diff --git a/src/io.cpp b/src/io.cpp
index 44251aa..79e753a 100644
--- a/src/io.cpp
+++ b/src/io.cpp
@@ -25,6 +25,7 @@
 #include <iomanip>
 #include <iostream>
 #include <map>
+#include <regex>
 #include <set>
 #include <sstream>
 #include <stdio.h>
@@ -132,7 +133,7 @@ std::istream &safeGetline(std::istream &is, std::string &t) {
 }
 
 // Read SNP file.
-bool ReadFile_snps(const string &file_snps, set<string> &setSnps) {
+bool ReadFile_snps(const string file_snps, set<string> &setSnps) {
   setSnps.clear();
 
   igzstream infile(file_snps.c_str(), igzstream::in);
@@ -654,6 +655,7 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     major = ch_ptr;
 
     if (setSnps.size() != 0 && setSnps.count(rs) == 0) {
+      // if SNP in geno but not in -snps we add an missing value
       SNPINFO sInfo = {"-9", rs, -9, -9, minor, major,
                        0,    -9, -9, 0,  0,     file_pos};
       snpInfo.push_back(sInfo);
@@ -684,9 +686,8 @@ bool ReadFile_geno(const string &file_geno, const set<string> &setSnps,
     gsl_vector_set_zero(genotype_miss);
     for (int i = 0; i < ni_total; ++i) {
       ch_ptr = strtok(NULL, " , \t");
-      if (indicator_idv[i] == 0) {
+      if (indicator_idv[i] == 0)
         continue;
-      }
 
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(genotype_miss, c_idv, 1);
@@ -1334,55 +1335,78 @@ void ReadFile_eigenD(const string &file_kd, bool &error, gsl_vector *eval) {
 }
 
 // Read bimbam mean genotype file and calculate kinship matrix.
-bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
-               const int k_mode, const int display_pace,
-               gsl_matrix *matrix_kin) {
+bool BimbamKin(const string file_geno, const set<string> ksnps,
+               vector<int> &indicator_snp, const int k_mode,
+               const int display_pace, gsl_matrix *matrix_kin,
+               const bool test_nind) {
   igzstream infile(file_geno.c_str(), igzstream::in);
-  if (!infile) {
-    cout << "error reading genotype file:" << file_geno << endl;
-    return false;
-  }
-
-  string line;
-  char *ch_ptr;
+  enforce_msg(infile, "error reading genotype file");
 
   size_t n_miss;
   double d, geno_mean, geno_var;
 
+  // setKSnp and/or LOCO support
+  bool process_ksnps = ksnps.size();
+
   size_t ni_total = matrix_kin->size1;
   gsl_vector *geno = gsl_vector_alloc(ni_total);
   gsl_vector *geno_miss = gsl_vector_alloc(ni_total);
 
-  // Create a large matrix.
-  size_t msize = 10000;
+  // Xlarge contains inds x markers
+  const size_t msize = K_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
+  enforce_msg(Xlarge, "allocate Xlarge");
+
   gsl_matrix_set_zero(Xlarge);
 
+  // For every SNP read the genotype per individual
   size_t ns_test = 0;
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
+    string line;
     !safeGetline(infile, line).eof();
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
     }
-    if (indicator_snp[t] == 0) {
+    if (indicator_snp[t] == 0)
       continue;
-    }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
-    ch_ptr = strtok(NULL, " , \t");
-    ch_ptr = strtok(NULL, " , \t");
+    std::regex_token_iterator<std::string::iterator> rend;
+    regex split_on("[,[:blank:]]+");
+    regex_token_iterator<string::iterator> tokens(line.begin(), line.end(),
+                                                  split_on, -1);
+    if (test_nind) {
+      // ascertain the number of genotype fields match
+      uint token_num = 0;
+      for (auto x = tokens; x != rend; x++)
+        token_num++;
+      enforce_str(token_num == ni_total + 3, line + " count fields");
+    }
+
+    auto snp = *tokens; // first field
+    // check whether SNP is included in ksnps (used by LOCO)
+    if (process_ksnps && ksnps.count(snp) == 0)
+      continue;
+
+    tokens++; // skip nucleotide fields
+    tokens++; // skip nucleotide fields
 
+    // calc SNP stats
     geno_mean = 0.0;
     n_miss = 0;
     geno_var = 0.0;
     gsl_vector_set_all(geno_miss, 0);
     for (size_t i = 0; i < ni_total; ++i) {
-      ch_ptr = strtok(NULL, " , \t");
-      if (strcmp(ch_ptr, "NA") == 0) {
+      tokens++;
+      enforce_str(tokens != rend, line + " number of fields");
+      string field = *tokens;
+      if (field == "NA") {
         gsl_vector_set(geno_miss, i, 0);
         n_miss++;
       } else {
-        d = atof(ch_ptr);
+        d = stod(field);
+        // make sure genotype field contains a number
+        if (field != "0" && field != "0.0")
+          enforce_str(d != 0.0f, field);
         gsl_vector_set(geno, i, d);
         gsl_vector_set(geno_miss, i, 1);
         geno_mean += d;
@@ -1406,23 +1430,28 @@ bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
     if (k_mode == 2 && geno_var != 0) {
       gsl_vector_scale(geno, 1.0 / sqrt(geno_var));
     }
+    // set the SNP column ns_test
     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, ns_test % msize);
-    gsl_vector_memcpy(&Xlarge_col.vector, geno);
+    enforce_gsl(gsl_vector_memcpy(&Xlarge_col.vector, geno));
 
     ns_test++;
 
+    // compute kinship matrix and return in matrix_kin a SNP at a time
     if (ns_test % msize == 0) {
       eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
       gsl_matrix_set_zero(Xlarge);
     }
   }
-
   if (ns_test % msize != 0) {
     eigenlib_dgemm("N", "T", 1.0, Xlarge, Xlarge, 1.0, matrix_kin);
   }
   cout << endl;
 
-  gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test);
+  // scale the kinship matrix
+  enforce_gsl(gsl_matrix_scale(matrix_kin, 1.0 / (double)ns_test));
+
+  // and transpose
+  // FIXME: the following is very slow
 
   for (size_t i = 0; i < ni_total; ++i) {
     for (size_t j = 0; j < i; ++j) {
@@ -1430,6 +1459,8 @@ bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
       gsl_matrix_set(matrix_kin, i, j, d);
     }
   }
+  // GSL is faster - and there are even faster methods
+  // enforce_gsl(gsl_matrix_transpose(matrix_kin));
 
   gsl_vector_free(geno);
   gsl_vector_free(geno_miss);
@@ -1463,7 +1494,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
   int n_bit;
 
   // Create a large matrix.
-  size_t msize = 10000;
+  const size_t msize = K_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(ni_total, msize);
   gsl_matrix_set_zero(Xlarge);
 
@@ -1583,7 +1614,7 @@ bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
 
 // Read bimbam mean genotype file, the second time, recode "mean"
 // genotype and calculate K.
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                    const bool calc_K) {
   igzstream infile(file_geno.c_str(), igzstream::in);
@@ -3326,13 +3357,14 @@ bool ReadFile_mcat(const string &file_mcat, map<string, size_t> &mapRS2cat,
 // Read bimbam mean genotype file and calculate kinship matrix; this
 // time, the kinship matrix is not centered, and can contain multiple
 // K matrix.
-bool BimbamKin(const string &file_geno, const int display_pace,
-               const vector<int> &indicator_idv,
-               const vector<int> &indicator_snp,
-               const map<string, double> &mapRS2weight,
-               const map<string, size_t> &mapRS2cat,
-               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
-               gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
+bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
+                         const int display_pace,
+                         const vector<int> &indicator_idv,
+                         const vector<int> &indicator_snp,
+                         const map<string, double> &mapRS2weight,
+                         const map<string, size_t> &mapRS2cat,
+                         const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+                         gsl_matrix *matrix_kin, gsl_vector *vector_ns) {
   igzstream infile(file_geno.c_str(), igzstream::in);
   if (!infile) {
     cout << "error reading genotype file:" << file_geno << endl;
@@ -3368,7 +3400,7 @@ bool BimbamKin(const string &file_geno, const int display_pace,
   }
 
   // Create a large matrix.
-  size_t msize = 10000;
+  const size_t msize = K_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
   gsl_matrix_set_zero(Xlarge);
 
@@ -3378,9 +3410,8 @@ bool BimbamKin(const string &file_geno, const int display_pace,
     if (t % display_pace == 0 || t == (indicator_snp.size() - 1)) {
       ProgressBar("Reading SNPs  ", t, indicator_snp.size() - 1);
     }
-    if (indicator_snp[t] == 0) {
+    if (indicator_snp[t] == 0)
       continue;
-    }
 
     ch_ptr = strtok((char *)line.c_str(), " , \t");
     ch_ptr = strtok(NULL, " , \t");
@@ -3562,7 +3593,7 @@ bool PlinkKin(const string &file_bed, const int display_pace,
   }
 
   // Create a large matrix.
-  size_t msize = 10000;
+  const size_t msize = K_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(ni_test, msize * n_vc);
   gsl_matrix_set_zero(Xlarge);
 
@@ -3742,7 +3773,8 @@ bool PlinkKin(const string &file_bed, const int display_pace,
 }
 
 bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
-              const int display_pace, const vector<int> &indicator_idv,
+              const set<string> setKSnps, const int display_pace,
+              const vector<int> &indicator_idv,
               const vector<vector<int>> &mindicator_snp,
               const map<string, double> &mapRS2weight,
               const map<string, size_t> &mapRS2cat,
@@ -3774,8 +3806,9 @@ bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
       PlinkKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
                mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
     } else {
-      BimbamKin(file_name, display_pace, indicator_idv, mindicator_snp[l],
-                mapRS2weight, mapRS2cat, msnpInfo[l], W, kin_tmp, ns_tmp);
+      BimbamKinUncentered(file_name, setKSnps, display_pace, indicator_idv,
+                          mindicator_snp[l], mapRS2weight, mapRS2cat,
+                          msnpInfo[l], W, kin_tmp, ns_tmp);
     }
 
     // Add ns.
diff --git a/src/io.h b/src/io.h
index 3e1145a..27f145f 100644
--- a/src/io.h
+++ b/src/io.h
@@ -34,7 +34,7 @@ void ProgressBar(string str, double p, double total);
 void ProgressBar(string str, double p, double total, double ratio);
 std::istream &safeGetline(std::istream &is, std::string &t);
 
-bool ReadFile_snps(const string &file_snps, set<string> &setSnps);
+bool ReadFile_snps(const string file_snps, set<string> &setSnps);
 bool ReadFile_snps_header(const string &file_snps, set<string> &setSnps);
 bool ReadFile_log(const string &file_log, double &pheno_mean);
 
@@ -83,13 +83,14 @@ void ReadFile_mk(const string &file_mk, vector<int> &indicator_idv,
 void ReadFile_eigenU(const string &file_u, bool &error, gsl_matrix *U);
 void ReadFile_eigenD(const string &file_d, bool &error, gsl_vector *eval);
 
-bool BimbamKin(const string &file_geno, vector<int> &indicator_snp,
-               const int k_mode, const int display_pace,
-               gsl_matrix *matrix_kin);
+bool BimbamKin(const string file_geno, const set<string> ksnps,
+               vector<int> &indicator_snp, const int k_mode,
+               const int display_pace, gsl_matrix *matrix_kin,
+               const bool test_nind);
 bool PlinkKin(const string &file_bed, vector<int> &indicator_snp,
               const int k_mode, const int display_pace, gsl_matrix *matrix_kin);
 
-bool ReadFile_geno(const string &file_geno, vector<int> &indicator_idv,
+bool ReadFile_geno(const string file_geno, vector<int> &indicator_idv,
                    vector<int> &indicator_snp, gsl_matrix *UtX, gsl_matrix *K,
                    const bool calc_K);
 bool ReadFile_bed(const string &file_bed, vector<int> &indicator_idv,
@@ -124,13 +125,14 @@ bool ReadFile_catc(const string &file_cat,
 bool ReadFile_mcatc(const string &file_mcat,
                     map<string, vector<double>> &mapRS2catc, size_t &n_cat);
 
-bool BimbamKin(const string &file_geno, const int display_pace,
-               const vector<int> &indicator_idv,
-               const vector<int> &indicator_snp,
-               const map<string, double> &mapRS2weight,
-               const map<string, size_t> &mapRS2cat,
-               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
-               gsl_matrix *matrix_kin, gsl_vector *vector_ns);
+bool BimbamKinUncentered(const string &file_geno, const set<string> ksnps,
+                         const int display_pace,
+                         const vector<int> &indicator_idv,
+                         const vector<int> &indicator_snp,
+                         const map<string, double> &mapRS2weight,
+                         const map<string, size_t> &mapRS2cat,
+                         const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
+                         gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool PlinkKin(const string &file_bed, const int display_pace,
               const vector<int> &indicator_idv,
               const vector<int> &indicator_snp,
@@ -139,7 +141,8 @@ bool PlinkKin(const string &file_bed, const int display_pace,
               const vector<SNPINFO> &snpInfo, const gsl_matrix *W,
               gsl_matrix *matrix_kin, gsl_vector *vector_ns);
 bool MFILEKin(const size_t mfile_mode, const string &file_mfile,
-              const int display_pace, const vector<int> &indicator_idv,
+              const set<string> setKSnps, const int display_pace,
+              const vector<int> &indicator_idv,
               const vector<vector<int>> &mindicator_snp,
               const map<string, double> &mapRS2weight,
               const map<string, size_t> &mapRS2cat,
diff --git a/src/lmm.cpp b/src/lmm.cpp
index 3f51073..eb76265 100644
--- a/src/lmm.cpp
+++ b/src/lmm.cpp
@@ -80,6 +80,7 @@ void LMM::CopyFromParam(PARAM &cPar) {
   indicator_idv = cPar.indicator_idv;
   indicator_snp = cPar.indicator_snp;
   snpInfo = cPar.snpInfo;
+  setGWASnps = cPar.setGWASnps;
 
   return;
 }
@@ -165,6 +166,7 @@ void LMM::WriteFiles() {
       }
     }
   } else {
+    bool process_gwasnps = setGWASnps.size();
     outfile << "chr"
             << "\t"
             << "rs"
@@ -217,9 +219,13 @@ void LMM::WriteFiles() {
 
     size_t t = 0;
     for (size_t i = 0; i < snpInfo.size(); ++i) {
-      if (indicator_snp[i] == 0) {
+
+      if (indicator_snp[i] == 0)
         continue;
-      }
+      auto snp = snpInfo[i].rs_number;
+      if (process_gwasnps && setGWASnps.count(snp) == 0)
+        continue;
+      // cout << t << endl;
 
       outfile << snpInfo[i].chr << "\t" << snpInfo[i].rs_number << "\t"
               << snpInfo[i].base_position << "\t" << snpInfo[i].n_miss << "\t"
@@ -1311,78 +1317,126 @@ void LMM::AnalyzeGene(const gsl_matrix *U, const gsl_vector *eval,
 
 void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                         const gsl_matrix *UtW, const gsl_vector *Uty,
-                        const gsl_matrix *W, const gsl_vector *y) {
-  igzstream infile(file_geno.c_str(), igzstream::in);
-  if (!infile) {
-    cout << "error reading genotype file:" << file_geno << endl;
-    return;
-  }
-
+                        const gsl_matrix *W, const gsl_vector *y,
+                        const set<string> gwasnps) {
   clock_t time_start = clock();
 
-  string line;
-  char *ch_ptr;
-
-  double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
-  double p_lrt = 0, p_score = 0;
-  double logl_H1 = 0.0;
-  int n_miss, c_phen;
-  double geno, x_mean;
+  // LOCO support
+  bool process_gwasnps = gwasnps.size();
+  if (process_gwasnps)
+    debug_msg("AnalyzeBimbam w. LOCO");
 
   // Calculate basic quantities.
   size_t n_index = (n_cvt + 2 + 1) * (n_cvt + 2) / 2;
 
-  gsl_vector *x = gsl_vector_alloc(U->size1);
-  gsl_vector *x_miss = gsl_vector_alloc(U->size1);
+  const size_t inds = U->size1;
+  gsl_vector *x = gsl_vector_alloc(inds); // #inds
+  gsl_vector *x_miss = gsl_vector_alloc(inds);
   gsl_vector *Utx = gsl_vector_alloc(U->size2);
   gsl_matrix *Uab = gsl_matrix_alloc(U->size2, n_index);
   gsl_vector *ab = gsl_vector_alloc(n_index);
 
-  // Create a large matrix.
-  size_t msize = 10000;
-  gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
-  gsl_matrix_set_zero(Xlarge);
+  // Create a large matrix with LMM_BATCH_SIZE columns for batched processing
+  // const size_t msize=(process_gwasnps ? 1 : LMM_BATCH_SIZE);
+  const size_t msize = LMM_BATCH_SIZE;
+  gsl_matrix *Xlarge = gsl_matrix_alloc(inds, msize);
+  gsl_matrix *UtXlarge = gsl_matrix_alloc(inds, msize);
 
+  enforce_msg(Xlarge && UtXlarge, "Xlarge memory check"); // just to be sure
+  gsl_matrix_set_zero(Xlarge);
   gsl_matrix_set_zero(Uab);
   CalcUab(UtW, Uty, Uab);
 
   // start reading genotypes and analyze
-  size_t c = 0, t_last = 0;
-  for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    if (indicator_snp[t] == 0) {
-      continue;
+  size_t c = 0;
+
+  igzstream infile(file_geno.c_str(), igzstream::in);
+  enforce_msg(infile, "error reading genotype file");
+
+  auto batch_compute = [&](size_t l) { // using a C++ closure
+    // Compute SNPs in batch, note the computations are independent per SNP
+    gsl_matrix_view Xlarge_sub = gsl_matrix_submatrix(Xlarge, 0, 0, inds, l);
+    gsl_matrix_view UtXlarge_sub =
+        gsl_matrix_submatrix(UtXlarge, 0, 0, inds, l);
+
+    time_start = clock();
+    eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
+                   &UtXlarge_sub.matrix);
+    time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+    gsl_matrix_set_zero(Xlarge);
+    for (size_t i = 0; i < l; i++) {
+      // for every batch...
+      gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
+      gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
+
+      CalcUab(UtW, Uty, Utx, Uab);
+
+      time_start = clock();
+      FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
+
+      double lambda_mle = 0, lambda_remle = 0, beta = 0, se = 0, p_wald = 0;
+      double p_lrt = 0, p_score = 0;
+      double logl_H1 = 0.0;
+
+      // 3 is before 1.
+      if (a_mode == 3 || a_mode == 4) {
+        CalcRLScore(l_mle_null, param1, beta, se, p_score);
+      }
+
+      if (a_mode == 1 || a_mode == 4) {
+        // for univariate a_mode is 1
+        CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle, logl_H1);
+        CalcRLWald(lambda_remle, param1, beta, se, p_wald);
+      }
+
+      if (a_mode == 2 || a_mode == 4) {
+        CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
+        p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
+      }
+
+      time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+
+      // Store summary data.
+      SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
+                      p_wald, p_lrt, p_score};
+      sumStat.push_back(SNPs);
     }
-    t_last++;
-  }
+  };
+
   for (size_t t = 0; t < indicator_snp.size(); ++t) {
-    !safeGetline(infile, line).eof();
+    // for every SNP
+    string line;
+    safeGetline(infile, line);
     if (t % d_pace == 0 || t == (ns_total - 1)) {
       ProgressBar("Reading SNPs  ", t, ns_total - 1);
     }
-    if (indicator_snp[t] == 0) {
+    if (indicator_snp[t] == 0)
       continue;
-    }
 
-    ch_ptr = strtok((char *)line.c_str(), " , \t");
+    char *ch_ptr = strtok((char *)line.c_str(), " , \t");
+    auto snp = string(ch_ptr);
+    // check whether SNP is included in gwasnps (used by LOCO)
+    if (process_gwasnps && gwasnps.count(snp) == 0)
+      continue;
     ch_ptr = strtok(NULL, " , \t");
     ch_ptr = strtok(NULL, " , \t");
 
-    x_mean = 0.0;
-    c_phen = 0;
-    n_miss = 0;
+    double x_mean = 0.0;
+    int c_phen = 0;
+    int n_miss = 0;
     gsl_vector_set_zero(x_miss);
     for (size_t i = 0; i < ni_total; ++i) {
+      // get the genotypes per individual and compute stats per SNP
       ch_ptr = strtok(NULL, " , \t");
-      if (indicator_idv[i] == 0) {
+      if (indicator_idv[i] == 0)
         continue;
-      }
 
       if (strcmp(ch_ptr, "NA") == 0) {
         gsl_vector_set(x_miss, c_phen, 0.0);
         n_miss++;
       } else {
-        geno = atof(ch_ptr);
+        double geno = atof(ch_ptr);
 
         gsl_vector_set(x, c_phen, geno);
         gsl_vector_set(x_miss, c_phen, 1.0);
@@ -1398,66 +1452,16 @@ void LMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
         gsl_vector_set(x, i, x_mean);
       }
     }
-
+    // copy genotype values for SNP into Xlarge cache
     gsl_vector_view Xlarge_col = gsl_matrix_column(Xlarge, c % msize);
     gsl_vector_memcpy(&Xlarge_col.vector, x);
-    c++;
-
-    if (c % msize == 0 || c == t_last) {
-      size_t l = 0;
-      if (c % msize == 0) {
-        l = msize;
-      } else {
-        l = c % msize;
-      }
-
-      gsl_matrix_view Xlarge_sub =
-          gsl_matrix_submatrix(Xlarge, 0, 0, Xlarge->size1, l);
-      gsl_matrix_view UtXlarge_sub =
-          gsl_matrix_submatrix(UtXlarge, 0, 0, UtXlarge->size1, l);
-
-      time_start = clock();
-      eigenlib_dgemm("T", "N", 1.0, U, &Xlarge_sub.matrix, 0.0,
-                     &UtXlarge_sub.matrix);
-      time_UtX += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-      gsl_matrix_set_zero(Xlarge);
+    c++; // count SNPs going in
 
-      for (size_t i = 0; i < l; i++) {
-        gsl_vector_view UtXlarge_col = gsl_matrix_column(UtXlarge, i);
-        gsl_vector_memcpy(Utx, &UtXlarge_col.vector);
-
-        CalcUab(UtW, Uty, Utx, Uab);
-
-        time_start = clock();
-        FUNC_PARAM param1 = {false, ni_test, n_cvt, eval, Uab, ab, 0};
-
-        // 3 is before 1.
-        if (a_mode == 3 || a_mode == 4) {
-          CalcRLScore(l_mle_null, param1, beta, se, p_score);
-        }
-
-        if (a_mode == 1 || a_mode == 4) {
-          CalcLambda('R', param1, l_min, l_max, n_region, lambda_remle,
-                     logl_H1);
-          CalcRLWald(lambda_remle, param1, beta, se, p_wald);
-        }
-
-        if (a_mode == 2 || a_mode == 4) {
-          CalcLambda('L', param1, l_min, l_max, n_region, lambda_mle, logl_H1);
-          p_lrt = gsl_cdf_chisq_Q(2.0 * (logl_H1 - logl_mle_H0), 1);
-        }
-
-        time_opt += (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
-
-        // Store summary data.
-        SUMSTAT SNPs = {beta,   se,    lambda_remle, lambda_mle,
-                        p_wald, p_lrt, p_score};
-
-        sumStat.push_back(SNPs);
-      }
-    }
+    if (c == msize)
+      batch_compute(msize);
   }
+  batch_compute(c % msize);
+  // cout << "Counted SNPs " << c << " sumStat " << sumStat.size() << endl;
   cout << endl;
 
   gsl_vector_free(x);
@@ -1505,7 +1509,7 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
   gsl_vector *ab = gsl_vector_alloc(n_index);
 
   // Create a large matrix.
-  size_t msize = 10000;
+  size_t msize = LMM_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix_set_zero(Xlarge);
@@ -1528,9 +1532,8 @@ void LMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
 
   size_t c = 0, t_last = 0;
   for (size_t t = 0; t < snpInfo.size(); ++t) {
-    if (indicator_snp[t] == 0) {
+    if (indicator_snp[t] == 0)
       continue;
-    }
     t_last++;
   }
   for (vector<SNPINFO>::size_type t = 0; t < snpInfo.size(); ++t) {
@@ -1697,7 +1700,7 @@ void LMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
   gsl_vector *ab = gsl_vector_alloc(n_index);
 
   // Create a large matrix.
-  size_t msize = 10000;
+  size_t msize = LMM_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix_set_zero(Xlarge);
diff --git a/src/lmm.h b/src/lmm.h
index c393daf..4d57ab1 100644
--- a/src/lmm.h
+++ b/src/lmm.h
@@ -26,6 +26,8 @@
 
 using namespace std;
 
+#define LMM_BATCH_SIZE 10000 // used for batch processing
+
 class FUNC_PARAM {
 
 public:
@@ -78,6 +80,7 @@ public:
   vector<int> indicator_snp;
 
   vector<SNPINFO> snpInfo; // Record SNP information.
+  set<string> setGWASnps;  // Record SNP information.
 
   // Not included in PARAM.
   vector<SUMSTAT> sumStat; // Output SNPSummary Data.
@@ -97,7 +100,8 @@ public:
                    const gsl_matrix *W, const gsl_vector *y);
   void AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
                      const gsl_matrix *UtW, const gsl_vector *Uty,
-                     const gsl_matrix *W, const gsl_vector *y);
+                     const gsl_matrix *W, const gsl_vector *y,
+                     const set<string> gwasnps);
   void AnalyzePlinkGXE(const gsl_matrix *U, const gsl_vector *eval,
                        const gsl_matrix *UtW, const gsl_vector *Uty,
                        const gsl_matrix *W, const gsl_vector *y,
diff --git a/src/mvlmm.cpp b/src/mvlmm.cpp
index f1ab3fc..eb591ca 100644
--- a/src/mvlmm.cpp
+++ b/src/mvlmm.cpp
@@ -2974,7 +2974,7 @@ void MVLMM::Analyzebgen(const gsl_matrix *U, const gsl_vector *eval,
   string line;
 
   // Create a large matrix.
-  size_t msize = 10000;
+  size_t msize = LMM_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix_set_zero(Xlarge);
@@ -3531,7 +3531,7 @@ void MVLMM::AnalyzeBimbam(const gsl_matrix *U, const gsl_vector *eval,
   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
 
   // Create a large matrix.
-  size_t msize = 10000;
+  size_t msize = LMM_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix_set_zero(Xlarge);
@@ -3968,7 +3968,7 @@ void MVLMM::AnalyzePlink(const gsl_matrix *U, const gsl_vector *eval,
   size_t dc_size = d_size * (c_size + 1), v_size = d_size * (d_size + 1) / 2;
 
   // Create a large matrix.
-  size_t msize = 10000;
+  size_t msize = LMM_BATCH_SIZE;
   gsl_matrix *Xlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix *UtXlarge = gsl_matrix_alloc(U->size1, msize);
   gsl_matrix_set_zero(Xlarge);
diff --git a/src/param.cpp b/src/param.cpp
index 2572bbb..6ab4339 100644
--- a/src/param.cpp
+++ b/src/param.cpp
@@ -38,6 +38,54 @@
 
 using namespace std;
 
+// ---- Helper functions which do not use the PARAM scope
+
+// Calculate the SNP sets as used in LOCO from mapchr which was filled
+// from the annotation file. Returns ksnps (used for K) and gwasnps (used for
+// GWA). Both are complimentary with each other and subsets of setSnps.
+
+void LOCO_set_Snps(set<string> &ksnps, set<string> &gwasnps,
+                   const map<string, string> mapchr, const string loco) {
+  enforce_msg(ksnps.size() == 0, "make sure knsps is not initialized twice");
+  enforce_msg(gwasnps.size() == 0,
+              "make sure gwasnps is not initialized twice");
+  for (auto &kv : mapchr) {
+    auto snp = kv.first;
+    auto chr = kv.second;
+    if (chr != loco) {
+      ksnps.insert(snp);
+    } else {
+      gwasnps.insert(snp);
+    }
+  }
+}
+
+// Trim #individuals to size which is used to write tests that run faster
+//
+// Note it actually trims the number of functional individuals
+// (indicator_idv[x] == 1). This should match indicator_cvt etc. If
+// this gives problems with certain sets we can simply trim to size.
+
+void trim_individuals(vector<int> &idvs, size_t ni_max, bool debug) {
+  if (ni_max) {
+    size_t count = 0;
+    for (auto ind = idvs.begin(); ind != idvs.end(); ++ind) {
+      if (*ind)
+        count++;
+      if (count >= ni_max)
+        break;
+    }
+    if (count != idvs.size()) {
+      if (debug)
+        cout << "**** TEST MODE: trim individuals from " << idvs.size()
+             << " to " << count << endl;
+      idvs.resize(count);
+    }
+  }
+}
+
+// ---- PARAM class implementation
+
 PARAM::PARAM(void)
     : mode_silence(false), a_mode(0), k_mode(1), d_pace(100000),
       file_out("result"), path_out("./output/"), miss_level(0.05),
@@ -96,6 +144,15 @@ void PARAM::ReadFiles(void) {
     setSnps.clear();
   }
 
+  // Read KSNP set.
+  if (!file_ksnps.empty()) {
+    if (ReadFile_snps(file_ksnps, setKSnps) == false) {
+      error = true;
+    }
+  } else {
+    setKSnps.clear();
+  }
+
   // For prediction.
   if (!file_epm.empty()) {
     if (ReadFile_est(file_epm, est_column, mapRS2est) == false) {
@@ -164,6 +221,7 @@ void PARAM::ReadFiles(void) {
   } else {
     n_cvt = 1;
   }
+  trim_individuals(indicator_cvt, ni_max, mode_debug);
 
   if (!file_gxe.empty()) {
     if (ReadFile_column(file_gxe, indicator_gxe, gxe, 1) == false) {
@@ -176,6 +234,8 @@ void PARAM::ReadFiles(void) {
     }
   }
 
+  trim_individuals(indicator_idv, ni_max, mode_debug);
+
   // WJA added.
   // Read genotype and phenotype file for bgen format.
   if (!file_oxford.empty()) {
@@ -273,12 +333,15 @@ void PARAM::ReadFiles(void) {
     gsl_matrix *W = gsl_matrix_alloc(ni_test, n_cvt);
     CopyCvt(W);
 
+    trim_individuals(indicator_idv, ni_max, mode_debug);
+    trim_individuals(indicator_cvt, ni_max, mode_debug);
     if (ReadFile_geno(file_geno, setSnps, W, indicator_idv, indicator_snp,
                       maf_level, miss_level, hwe_level, r2_level, mapRS2chr,
                       mapRS2bp, mapRS2cM, snpInfo, ns_test) == false) {
       error = true;
     }
     gsl_matrix_free(W);
+
     ns_total = indicator_snp.size();
   }
 
@@ -457,6 +520,11 @@ void PARAM::ReadFiles(void) {
     // ni_test, save all useful covariates.
     ProcessCvtPhen();
   }
+
+  // Compute setKSnps when -loco is passed in
+  if (!loco.empty()) {
+    LOCO_set_Snps(setKSnps, setGWASnps, mapRS2chr, loco);
+  }
   return;
 }
 
@@ -869,22 +937,22 @@ void PARAM::CheckParam(void) {
     error = true;
   }
 
-  str = file_snps;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open snps file: " << str << endl;
-    error = true;
-  }
-
-  str = file_log;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open log file: " << str << endl;
-    error = true;
-  }
+  enforce_fexists(file_snps, "open file");
+  enforce_fexists(file_ksnps, "open file");
+  enforce_fexists(file_gwasnps, "open file");
+  enforce_fexists(file_log, "open file");
+  enforce_fexists(file_anno, "open file");
 
-  str = file_anno;
-  if (!str.empty() && stat(str.c_str(), &fileInfo) == -1) {
-    cout << "error! fail to open annotation file: " << str << endl;
-    error = true;
+  if (!loco.empty()) {
+    enforce_msg((a_mode >= 1 && a_mode <= 4) || a_mode == 21 || a_mode == 22,
+                "LOCO only works with LMM and K");
+    enforce_msg(file_bfile.empty(), "LOCO does not work with PLink (yet)");
+    enforce_msg(file_oxford.empty(), "LOCO does not work with Oxford (yet)");
+    enforce_msg(file_gxe.empty(), "LOCO does not support GXE (yet)");
+    enforce_msg(!file_anno.empty(),
+                "LOCO requires annotation file (-a switch)");
+    enforce_msg(file_ksnps.empty(), "LOCO does not allow -ksnps switch");
+    enforce_msg(file_gwasnps.empty(), "LOCO does not allow -gwasnps switch");
   }
 
   str = file_kin;
@@ -1005,7 +1073,7 @@ void PARAM::CheckData(void) {
       (indicator_cvt).size() != (indicator_idv).size()) {
     error = true;
     cout << "error! number of rows in the covariates file do not "
-         << "match the number of individuals. " << endl;
+         << "match the number of individuals. " << indicator_cvt.size() << endl;
     return;
   }
   if ((indicator_gxe).size() != 0 &&
@@ -1123,7 +1191,15 @@ void PARAM::CheckData(void) {
     if (!file_gene.empty()) {
       cout << "## number of total genes = " << ng_total << endl;
     } else if (file_epm.empty() && a_mode != 43 && a_mode != 5) {
-      cout << "## number of total SNPs = " << ns_total << endl;
+      if (!loco.empty())
+        cout << "## leave one chromosome out (LOCO) = " << loco << endl;
+      cout << "## number of total SNPs    = " << ns_total << endl;
+      if (setSnps.size())
+        cout << "## number of considered SNPS = " << setSnps.size() << endl;
+      if (setKSnps.size())
+        cout << "## number of SNPS for K    = " << setKSnps.size() << endl;
+      if (setGWASnps.size())
+        cout << "## number of SNPS for GWAS = " << setGWASnps.size() << endl;
       cout << "## number of analyzed SNPs = " << ns_test << endl;
     } else {
     }
@@ -1297,20 +1373,22 @@ void PARAM::CalcKin(gsl_matrix *matrix_kin) {
 
   if (!file_bfile.empty()) {
     file_str = file_bfile + ".bed";
+    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
     if (PlinkKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
         false) {
       error = true;
     }
   } else if (!file_oxford.empty()) {
     file_str = file_oxford + ".bgen";
+    enforce_msg(loco.empty(), "FIXME: LOCO nyi");
     if (bgenKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
         false) {
       error = true;
     }
   } else {
     file_str = file_geno;
-    if (BimbamKin(file_str, indicator_snp, a_mode - 20, d_pace, matrix_kin) ==
-        false) {
+    if (BimbamKin(file_str, setKSnps, indicator_snp, a_mode - 20, d_pace,
+                  matrix_kin, ni_max == 0) == false) {
       error = true;
     }
   }
@@ -1720,37 +1798,43 @@ void PARAM::CalcS(const map<string, double> &mapRS2wA,
   } else if (!file_geno.empty()) {
     file_str = file_geno;
     if (mapRS2wA.size() == 0) {
-      if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wK,
-                    mapRS2cat, snpInfo, W, K, ns) == false) {
+      if (BimbamKinUncentered(file_str, setKSnps, d_pace, indicator_idv,
+                              indicator_snp, mapRS2wK, mapRS2cat, snpInfo, W, K,
+                              ns) == false) {
         error = true;
       }
     } else {
-      if (BimbamKin(file_str, d_pace, indicator_idv, indicator_snp, mapRS2wA,
-                    mapRS2cat, snpInfo, W, A, ns) == false) {
+      if (BimbamKinUncentered(file_str, setKSnps, d_pace, indicator_idv,
+                              indicator_snp, mapRS2wA, mapRS2cat, snpInfo, W, A,
+                              ns) == false) {
         error = true;
       }
     }
   } else if (!file_mbfile.empty()) {
     if (mapRS2wA.size() == 0) {
-      if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
-                   mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+      if (MFILEKin(1, file_mbfile, setKSnps, d_pace, indicator_idv,
+                   mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K,
+                   ns) == false) {
         error = true;
       }
     } else {
-      if (MFILEKin(1, file_mbfile, d_pace, indicator_idv, mindicator_snp,
-                   mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+      if (MFILEKin(1, file_mbfile, setKSnps, d_pace, indicator_idv,
+                   mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A,
+                   ns) == false) {
         error = true;
       }
     }
   } else if (!file_mgeno.empty()) {
     if (mapRS2wA.size() == 0) {
-      if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
-                   mapRS2wK, mapRS2cat, msnpInfo, W, K, ns) == false) {
+      if (MFILEKin(0, file_mgeno, setKSnps, d_pace, indicator_idv,
+                   mindicator_snp, mapRS2wK, mapRS2cat, msnpInfo, W, K,
+                   ns) == false) {
         error = true;
       }
     } else {
-      if (MFILEKin(0, file_mgeno, d_pace, indicator_idv, mindicator_snp,
-                   mapRS2wA, mapRS2cat, msnpInfo, W, A, ns) == false) {
+      if (MFILEKin(0, file_mgeno, setKSnps, d_pace, indicator_idv,
+                   mindicator_snp, mapRS2wA, mapRS2cat, msnpInfo, W, A,
+                   ns) == false) {
         error = true;
       }
     }
diff --git a/src/param.h b/src/param.h
index 33e2431..45d8c0f 100644
--- a/src/param.h
+++ b/src/param.h
@@ -19,12 +19,15 @@
 #ifndef __PARAM_H__
 #define __PARAM_H__
 
+#include "debug.h"
 #include "gsl/gsl_matrix.h"
 #include "gsl/gsl_vector.h"
 #include <map>
 #include <set>
 #include <vector>
 
+#define K_BATCH_SIZE 10000 // #snps used for batched K
+
 using namespace std;
 
 class SNPINFO {
@@ -110,6 +113,7 @@ class PARAM {
 public:
   // IO-related parameters.
   bool mode_silence;
+  bool mode_debug = false;
   int a_mode; // Analysis mode, 1/2/3/4 for Frequentist tests
   int k_mode; // Kinship read mode: 1: n by n matrix, 2: id/id/k_value;
   vector<size_t> p_column; // Which phenotype column needs analysis.
@@ -135,12 +139,14 @@ public:
   string file_bf, file_hyp;
   string path_out;
 
-  string file_epm;  // Estimated parameter file.
-  string file_ebv;  // Estimated breeding value file.
-  string file_log;  // Log file containing mean estimate.
-  string file_read; // File containing total number of reads.
-  string file_gene; // Gene expression file.
-  string file_snps; // File containing analyzed SNPs or genes.
+  string file_epm;     // Estimated parameter file.
+  string file_ebv;     // Estimated breeding value file.
+  string file_log;     // Log file containing mean estimate.
+  string file_read;    // File containing total number of reads.
+  string file_gene;    // Gene expression file.
+  string file_snps;    // File containing analyzed SNPs or genes.
+  string file_ksnps;   // File SNPs for computing K
+  string file_gwasnps; // File SNPs for computing GWAS
 
   // WJA added.
   string file_oxford;
@@ -152,6 +158,7 @@ public:
   double r2_level;
 
   // LMM-related parameters.
+  string loco;
   double l_min;
   double l_max;
   size_t n_region;
@@ -215,6 +222,7 @@ public:
 
   // Number of individuals.
   size_t ni_total, ni_test, ni_cvt, ni_study, ni_ref;
+  size_t ni_max = 0; // -nind switch for testing purposes
 
   // Number of observed and missing phenotypes.
   size_t np_obs, np_miss;
@@ -305,7 +313,9 @@ public:
 
   vector<SNPINFO> snpInfo;          // Record SNP information.
   vector<vector<SNPINFO>> msnpInfo; // Record SNP information.
-  set<string> setSnps;              // Set of snps for analysis.
+  set<string> setSnps;              // Set of snps for analysis (-snps).
+  set<string> setKSnps;             // Set of snps for K (-ksnps and LOCO)
+  set<string> setGWASnps;           // Set of snps for GWA (-gwasnps and LOCO)
 
   // Constructor.
   PARAM();
@@ -351,4 +361,10 @@ public:
 
 size_t GetabIndex(const size_t a, const size_t b, const size_t n_cvt);
 
+// Helpers for checking parameters
+#define enforce_fexists(fn, msg)                                               \
+  if (!fn.empty())                                                             \
+    enforce_msg(stat(fn.c_str(), &fileInfo) == 0,                              \
+                ((std::string(__STRING(fn)) + ": " + msg).c_str()));
+
 #endif
diff --git a/src/vc.h b/src/vc.h
index c6f66b4..49397bb 100644
--- a/src/vc.h
+++ b/src/vc.h
@@ -40,6 +40,8 @@ public:
   bool noconstrain;
 };
 
+// Methods for variant component (VC) estimation
+
 class VC {
 
 public: