1 files changed, 104 insertions, 71 deletions
diff --git a/src/gemma.cpp b/src/gemma.cpp
index 6c2b3f4..a39d67a 100644
--- a/src/gemma.cpp
+++ b/src/gemma.cpp
@@ -62,6 +62,7 @@ extern "C" {
 #include "vc.h"
 #include "debug.h"
 #include "version.h"
+#include <guile_api.h>
 
 using namespace std;
 
@@ -81,10 +82,12 @@ void gemma_gsl_error_handler (const char * reason,
 #include <openblas_config.h>
 #endif
 
+using namespace gemmaguile;
+
 void GEMMA::PrintHeader(void) {
 
   cout <<
-    "GEMMA forked executable --- part of PanGEMMA " << version << " (" << date << ") by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl;
+      "Pangemma " << version << " --- GEMMA 0.98.5 compatible executable (" << date << ") with" <<  OPENBLAS_VERSION << "and guile " << global_guile_version() << endl << "by Xiang Zhou, Pjotr Prins and team (C) 2012-" << year << endl ;
   return;
 }
 
@@ -1653,7 +1656,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
   clock_t time_begin, time_start;
   time_begin = clock();
 
-  if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
+  // if (is_check_mode()) enable_segfpe(); // fast NaN checking by default
 
   // Read Files.
   cout << "Reading Files ... " << endl;
@@ -1908,32 +1911,52 @@ void GEMMA::BatchRun(PARAM &cPar) {
   }
 
   // Generate Kinship matrix (optionally using LOCO)
-  if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
+  // if (cPar.a_mode == M_KIN || cPar.a_mode == M_KIN2) {
+  if (is_kinship_compute(cPar.a_mode)) {
     cout << "Calculating Relatedness Matrix ... " << endl;
 
-    gsl_matrix *G = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
-    enforce_msg(G, "allocate G"); // just to be sure
+    if (cPar.is_mdb) {
+      time_start = clock();
+      auto K = cPar.MdbCalcKin();
+      cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
+      if (cPar.a_mode == M_KIN) {
+        cPar.WriteMatrix(K, "cXX");
+      } else { // M_KIN2
+        cPar.WriteMatrix(K, "sXX");
+      }
+
+      gsl_matrix_safe_free(K);
+      return;
+    }
+
+    enforce(cPar.ni_total > 0);
+    gsl_matrix *K = gsl_matrix_safe_alloc(cPar.ni_total, cPar.ni_total);
+    enforce_msg(K, "allocate G"); // just to be sure
 
     time_start = clock();
 
-    cPar.CalcKin(G);
+    if (cPar.error == true) {
+      cout << "error! failed to prepare for calculating relatedness matrix. " << endl;
+      return;
+    }
+    cPar.CalcKin(K);
 
     cPar.time_G = (clock() - time_start) / (double(CLOCKS_PER_SEC) * 60.0);
     if (cPar.error == true) {
-      cout << "error! fail to calculate relatedness matrix. " << endl;
+      cout << "error! failed to calculate relatedness matrix. " << endl;
       return;
     }
 
     // Now we have the Kinship matrix test it
-    validate_K(G);
+    validate_K(K);
 
     if (cPar.a_mode == M_KIN) {
-      cPar.WriteMatrix(G, "cXX");
+      cPar.WriteMatrix(K, "cXX");
     } else { // M_KIN2
-      cPar.WriteMatrix(G, "sXX");
+      cPar.WriteMatrix(K, "sXX");
     }
 
-    gsl_matrix_safe_free(G);
+    gsl_matrix_safe_free(K);
   }
 
   // Compute the LDSC weights (not implemented yet)
@@ -1943,7 +1966,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     VARCOV cVarcov;
     cVarcov.CopyFromParam(cPar);
 
-    if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
+    // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
 
     if (!cPar.file_bfile.empty()) {
       cVarcov.AnalyzePlink();
@@ -2058,7 +2081,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     VARCOV cVarcov;
     cVarcov.CopyFromParam(cPar);
 
-    if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
+    // if (is_check_mode()) disable_segfpe(); // fast NaN checking for now
 
     if (!cPar.file_bfile.empty()) {
       cVarcov.AnalyzePlink();
@@ -2569,19 +2592,23 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.a_mode == M_LMM4 || cPar.a_mode == M_LMM5 || cPar.a_mode == M_LMM9 ||
       cPar.a_mode == M_EIGEN) { // Fit LMM or mvLMM or eigen
     write(cPar.a_mode, "Mode");
-    gsl_matrix *Y = gsl_matrix_safe_alloc(cPar.ni_test, cPar.n_ph);
+    auto n_ph    = cPar.n_ph;                                      // # of trait values
+    auto ni_test = cPar.ni_test;                                   // # of individuals to test
+    enforce(n_ph > 0);
+    enforce(ni_test > 0);
+    gsl_matrix *Y = gsl_matrix_safe_alloc(ni_test, n_ph);          // Y is ind x phenotypes
     enforce_msg(Y, "allocate Y"); // just to be sure
-    gsl_matrix *W = gsl_matrix_safe_alloc(Y->size1, cPar.n_cvt);
-    gsl_matrix *B = gsl_matrix_safe_alloc(Y->size2, W->size2); // B is a d by c
-                                                          // matrix
-    gsl_matrix *se_B = gsl_matrix_safe_alloc(Y->size2, W->size2);
-    gsl_matrix *G = gsl_matrix_safe_alloc(Y->size1, Y->size1);
-    gsl_matrix *U = gsl_matrix_safe_alloc(Y->size1, Y->size1);
-    gsl_matrix *UtW = gsl_matrix_calloc(Y->size1, W->size2);
-    gsl_matrix *UtY = gsl_matrix_calloc(Y->size1, Y->size2);
-    gsl_vector *eval = gsl_vector_calloc(Y->size1);
-    gsl_vector *env = gsl_vector_safe_alloc(Y->size1);
-    gsl_vector *weight = gsl_vector_safe_alloc(Y->size1);
+    gsl_matrix *W = gsl_matrix_safe_alloc(ni_test, cPar.n_cvt);    // W is ind x covariates
+    gsl_matrix *B = gsl_matrix_safe_alloc(n_ph, W->size2);         // B is a d by c (effect size)
+    gsl_matrix *se_B = gsl_matrix_safe_alloc(n_ph, W->size2);      // se
+    enforce(ni_test > 0);
+    gsl_matrix *K = gsl_matrix_safe_alloc(ni_test, ni_test);       // Kinship aka as G/GRM in the equation ind x ind
+    gsl_matrix *U = gsl_matrix_safe_alloc(ni_test, ni_test);
+    gsl_matrix *UtW = gsl_matrix_calloc(ni_test, W->size2);        // Transformed ind x covariates
+    gsl_matrix *UtY = gsl_matrix_calloc(ni_test, n_ph);            // Transformed ind x phenotypes
+    gsl_vector *eval = gsl_vector_calloc(ni_test);                 // eigen values
+    gsl_vector *env = gsl_vector_safe_alloc(ni_test);              // E environment
+    gsl_vector *weight = gsl_vector_safe_alloc(ni_test);           // weights
     debug_msg("Started on LMM");
     assert_issue(is_issue(26), UtY->data[0] == 0.0);
 
@@ -2592,38 +2619,38 @@ void GEMMA::BatchRun(PARAM &cPar) {
       cPar.CopyGxe(env);
     }
 
-    // read relatedness matrix G
+    // read relatedness matrix K
     if (!(cPar.file_kin).empty()) {
       ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
-                   cPar.k_mode, cPar.error, G);
+                   cPar.k_mode, cPar.error, K);
       debug_msg("Read K/GRM file");
       if (cPar.error == true) {
         cout << "error! fail to read kinship/relatedness file. " << endl;
         return;
       }
 
-      // center matrix G
-      CenterMatrix(G);
-      validate_K(G);
-      write(G, "G");
+      // center matrix K
+      CenterMatrix(K);
+      validate_K(K);
+      write(K, "K");
 
       // is residual weights are provided, then
       if (!cPar.file_weight.empty()) {
         cPar.CopyWeight(weight);
         double d, wi, wj;
-        for (size_t i = 0; i < G->size1; i++) {
+        for (size_t i = 0; i < K->size1; i++) {
           wi = gsl_vector_get(weight, i);
-          for (size_t j = i; j < G->size2; j++) {
+          for (size_t j = i; j < K->size2; j++) {
             wj = gsl_vector_get(weight, j);
-            d = gsl_matrix_get(G, i, j);
+            d = gsl_matrix_get(K, i, j);
             if (wi <= 0 || wj <= 0) {
               d = 0;
             } else {
               d /= safe_sqrt(wi * wj);
             }
-            gsl_matrix_set(G, i, j, d);
+            gsl_matrix_set(K, i, j, d);
             if (j != i) {
-              gsl_matrix_set(G, j, i, d);
+              gsl_matrix_set(K, j, i, d);
             }
           }
         }
@@ -2634,9 +2661,9 @@ void GEMMA::BatchRun(PARAM &cPar) {
       time_start = clock();
 
       if (cPar.a_mode == M_EIGEN) {
-        cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
+        cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0);
       } else {
-        cPar.trace_G = EigenDecomp_Zeroed(G, U, eval, 0);
+        cPar.trace_G = EigenDecomp_Zeroed(K, U, eval, 0);
       }
       // write(eval,"eval");
 
@@ -2693,8 +2720,8 @@ void GEMMA::BatchRun(PARAM &cPar) {
       LMM cLmm;
       cLmm.CopyFromParam(cPar);
 
-      gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
-      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+      gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get a single phenotype column
+      gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and transformed column
 
       cLmm.AnalyzeGene(U, eval, UtW, &UtY_col.vector, W,
                        &Y_col.vector); // y is the predictor, not the phenotype
@@ -2811,34 +2838,40 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
           // if (is_check_mode()) disable_segfpe(); // disable fast NaN checking for now
 
-          gsl_vector_view Y_col = gsl_matrix_column(Y, 0);
-          gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0);
+          gsl_vector_view Y_col = gsl_matrix_column(Y, 0); // get pheno column
+          gsl_vector_view UtY_col = gsl_matrix_column(UtY, 0); // and its transformed
           write(&Y_col.vector, "Y_col");
 
-          if (!cPar.file_bfile.empty()) {
-            // PLINK analysis
-            if (cPar.file_gxe.empty()) {
-              cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
-                                &Y_col.vector, cPar.setGWASnps);
-            }
-            else {
-              cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
-                                   &Y_col.vector, env);
-            }
+          if (cPar.is_mdb) {
+            cLmm.mdb_calc_gwa(U, eval, UtW, &UtY_col.vector, W, &Y_col.vector, cPar.loco);
+            cLmm.CopyToParam(cPar);
           }
           else {
-            // BIMBAM analysis
-
-            if (cPar.file_gxe.empty()) {
-              cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
-                                 &Y_col.vector, cPar.setGWASnps);
-            } else {
-              cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
-                                    &Y_col.vector, env);
+            if (!cPar.file_bfile.empty()) {
+              // PLINK analysis
+              if (cPar.file_gxe.empty()) {
+                cLmm.AnalyzePlink(U, eval, UtW, &UtY_col.vector, W,
+                                  &Y_col.vector, cPar.setGWASnps);
+              }
+              else {
+                cLmm.AnalyzePlinkGXE(U, eval, UtW, &UtY_col.vector, W,
+                                     &Y_col.vector, env);
+              }
+            }
+            else {
+              // BIMBAM analysis
+
+              if (cPar.file_gxe.empty()) {
+                cLmm.AnalyzeBimbam(U, eval, UtW, &UtY_col.vector, W,
+                                   &Y_col.vector, cPar.setGWASnps);
+              } else {
+                cLmm.AnalyzeBimbamGXE(U, eval, UtW, &UtY_col.vector, W,
+                                      &Y_col.vector, env);
+              }
             }
+            cLmm.WriteFiles(); // write output files
+            cLmm.CopyToParam(cPar);
           }
-          cLmm.WriteFiles();
-          cLmm.CopyToParam(cPar);
         } else {
           debug_msg("fit mvLMM (multiple phenotypes)");
           MVLMM cMvlmm;
@@ -2873,7 +2906,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     gsl_matrix_safe_free(W);
     gsl_matrix_warn_free(B); // sometimes unused
     gsl_matrix_warn_free(se_B);
-    gsl_matrix_warn_free(G);
+    gsl_matrix_warn_free(K);
     gsl_matrix_safe_free(U);
     gsl_matrix_safe_free(UtW);
     gsl_matrix_safe_free(UtY);
@@ -2898,7 +2931,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
     // run bvsr if rho==1
     if (cPar.rho_min == 1 && cPar.rho_max == 1) {
       // read genotypes X (not UtX)
-      cPar.ReadGenotypes(UtX, G, false);
+      cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
       // perform BSLMM analysis
       BSLMM cBslmm;
@@ -2916,7 +2949,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
       // read relatedness matrix G
       if (!(cPar.file_kin).empty()) {
-        cPar.ReadGenotypes(UtX, G, false);
+        cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
         // read relatedness matrix G
         ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
@@ -2930,7 +2963,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
         CenterMatrix(G);
         validate_K(G);
       } else {
-        cPar.ReadGenotypes(UtX, G, true);
+        cPar.ReadBIMBAMGenotypes(UtX, G, true);
       }
 
       // eigen-decomposition and calculate trace_G
@@ -3008,7 +3041,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
       // run bvsr if rho==1
       if (cPar.rho_min == 1 && cPar.rho_max == 1) {
         // read genotypes X (not UtX)
-        cPar.ReadGenotypes(UtX, G, false);
+        cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
         // perform BSLMM analysis
         BSLMM cBslmm;
@@ -3027,7 +3060,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
 
         // read relatedness matrix G
         if (!(cPar.file_kin).empty()) {
-          cPar.ReadGenotypes(UtX, G, false);
+          cPar.ReadBIMBAMGenotypes(UtX, G, false);
 
           // read relatedness matrix G
           ReadFile_kin(cPar.file_kin, cPar.indicator_idv, cPar.mapID2num,
@@ -3042,7 +3075,7 @@ void GEMMA::BatchRun(PARAM &cPar) {
           validate_K(G);
 
         } else {
-          cPar.ReadGenotypes(UtX, G, true);
+          cPar.ReadBIMBAMGenotypes(UtX, G, true);
         }
 
         // eigen-decomposition and calculate trace_G
@@ -3168,8 +3201,8 @@ void GEMMA::WriteLog(int argc, char **argv, PARAM &cPar) {
   }
 
   outfile << "##" << endl;
-  outfile << "## GEMMA Version    = " << version << " (" << date << ")" << endl;
-  outfile << "## Build profile    = " << GEMMA_PROFILE << endl ;
+  outfile << "## PANGEMMA Version = " << version << " (" << date << ")" << endl;
+  // outfile << "## Build profile    = " << GEMMA_PROFILE << endl ;
   #ifdef __GNUC__
   outfile << "## GCC version      = " << __GNUC__ << "." << __GNUC_MINOR__ << "." << __GNUC_PATCHLEVEL__ << endl;
   #endif